Epetra Package Browser (Single Doxygen Collection)  Development
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
test/BasicPerfTest_LL/cxx_main.cpp
Go to the documentation of this file.
1 //@HEADER
2 // ************************************************************************
3 //
4 // Epetra: Linear Algebra Services Package
5 // Copyright 2011 Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 //@HEADER
41 
42 
43 #define EPETRA_HAVE_JADMATRIX
44 #define EPETRA_VERY_SHORT_PERFTEST
45 #define EPETRA_HAVE_STATICPROFILE
46 #include "Epetra_Map.h"
47 #include "Epetra_LocalMap.h"
48 #include "Epetra_BlockMap.h"
49 #include "Epetra_Time.h"
50 #include "Epetra_CrsMatrix.h"
51 #include "Epetra_VbrMatrix.h"
52 #include "Epetra_Vector.h"
53 #include "Epetra_IntVector.h"
54 #include "Epetra_MultiVector.h"
57 #include "Epetra_Flops.h"
58 #ifdef EPETRA_MPI
59 #include "Epetra_MpiComm.h"
60 #include "mpi.h"
61 #else
62 #include "Epetra_SerialComm.h"
63 #endif
64 #include "../epetra_test_err.h"
65 #include "Epetra_Version.h"
66 #ifdef EPETRA_HAVE_JADMATRIX
67 #include "Epetra_JadMatrix.h"
68 #endif
69 
70 // prototypes
71 
72 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
73  int * xoff, int * yoff,
74  const Epetra_Comm &comm, bool verbose, bool summary,
75  Epetra_Map *& map,
77  Epetra_Vector *& b,
78  Epetra_Vector *& bt,
79  Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
80 
81 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
82  int * xoff, int * yoff, int nrhs,
83  const Epetra_Comm &comm, bool verbose, bool summary,
84  Epetra_Map *& map,
86  Epetra_MultiVector *& b,
87  Epetra_MultiVector *& bt,
88  Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
89 
90 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
91  int * xoff, int * yoff,
92  int nsizes, int * sizes,
93  const Epetra_Comm &comm, bool verbose, bool summary,
94  Epetra_BlockMap *& map,
96  Epetra_Vector *& b,
97  Epetra_Vector *& bt,
98  Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
99 
100 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
101  int * xoff, int * yoff,
102  int nsizes, int * sizes, int nrhs,
103  const Epetra_Comm &comm, bool verbose, bool summary,
104  Epetra_BlockMap *& map,
105  Epetra_VbrMatrix *& A,
106  Epetra_MultiVector *& b,
107  Epetra_MultiVector *& bt,
108  Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
109 
110 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
111  int myPID, long long * & myGlobalElements);
112 
114  Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
115 #ifdef EPETRA_HAVE_JADMATRIX
117  Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
118 #endif
121  bool StaticProfile, bool verbose, bool summary);
122 int main(int argc, char *argv[])
123 {
124  int ierr = 0;
125  double elapsed_time;
126  double total_flops;
127  double MFLOPs;
128 
129 
130 #ifdef EPETRA_MPI
131 
132  // Initialize MPI
133  MPI_Init(&argc,&argv);
134  Epetra_MpiComm comm( MPI_COMM_WORLD );
135 #else
136  Epetra_SerialComm comm;
137 #endif
138 
139  bool verbose = false;
140  bool summary = false;
141 
142  // Check if we should print verbose results to standard out
143  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;
144 
145  // Check if we should print verbose results to standard out
146  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;
147 
148  if(argc < 6) {
149  cerr << "Usage: " << argv[0]
150  << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
151  << "where:" << endl
152  << "NumNodesX - Number of mesh nodes in X direction per processor" << endl
153  << "NumNodesY - Number of mesh nodes in Y direction per processor" << endl
154  << "NumProcX - Number of processors to use in X direction" << endl
155  << "NumProcY - Number of processors to use in Y direction" << endl
156  << "NumPoints - Number of points to use in stencil (5, 9 or 25 only)" << endl
157  << "-v|-s - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
158  << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
159  << " Serial example:" << endl
160  << argv[0] << " 16 12 1 1 25 -v" << endl
161  << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
162  << " MPI example:" << endl
163  << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
164  << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
165  << " in the X direction and 8 in the Y direction. Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
166  << endl;
167  return(1);
168 
169  }
170  //char tmp;
171  //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
172  //if (comm.MyPID()==0) cin >> tmp;
173  //comm.Barrier();
174 
175  comm.SetTracebackMode(0); // This should shut down any error traceback reporting
176  if (verbose && comm.MyPID()==0)
177  cout << Epetra_Version() << endl << endl;
178  if (summary && comm.MyPID()==0) {
179  if (comm.NumProc()==1)
180  cout << Epetra_Version() << endl << endl;
181  else
182  cout << endl << endl; // Print two blank line to keep output columns lined up
183  }
184 
185  if (verbose) cout << comm <<endl;
186 
187 
188  // Redefine verbose to only print on PE 0
189 
190  if (verbose && comm.MyPID()!=0) verbose = false;
191  if (summary && comm.MyPID()!=0) summary = false;
192 
193  int numNodesX = atoi(argv[1]);
194  int numNodesY = atoi(argv[2]);
195  int numProcsX = atoi(argv[3]);
196  int numProcsY = atoi(argv[4]);
197  int numPoints = atoi(argv[5]);
198 
199  if (verbose || (summary && comm.NumProc()==1)) {
200  cout << " Number of local nodes in X direction = " << numNodesX << endl
201  << " Number of local nodes in Y direction = " << numNodesY << endl
202  << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
203  << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
204  << " Number of local nonzero entries = " << numNodesX*numNodesY*numPoints << endl
205  << " Number of global nonzero entries = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
206  << " Number of Processors in X direction = " << numProcsX << endl
207  << " Number of Processors in Y direction = " << numProcsY << endl
208  << " Number of Points in stencil = " << numPoints << endl << endl;
209  }
210  // Print blank line to keep output columns lined up
211  if (summary && comm.NumProc()>1)
212  cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;
213 
214  if (numProcsX*numProcsY!=comm.NumProc()) {
215  cerr << "Number of processors = " << comm.NumProc() << endl
216  << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
217  return(1);
218  }
219 
220  if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
221  cerr << "Number of points specified = " << numPoints << endl
222  << " is not 5, 9, 25" << endl << endl;
223  return(1);
224  }
225 
226  if (numNodesX*numNodesY<=0) {
227  cerr << "Product of number of nodes is <= zero" << endl << endl;
228  return(1);
229  }
230 
231  Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
232  Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
233  if (numPoints==5) {
234 
235  // Generate a 5-point 2D Finite Difference matrix
236  Xoff.Size(5);
237  Yoff.Size(5);
238  Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0; Xoff[4] = 0;
239  Yoff[0] = 0; Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1;
240 
241  // Generate a 2-point 2D Lower triangular Finite Difference matrix
242  XLoff.Size(2);
243  YLoff.Size(2);
244  XLoff[0] = -1; XLoff[1] = 0;
245  YLoff[0] = 0; YLoff[1] = -1;
246 
247  // Generate a 3-point 2D upper triangular Finite Difference matrix
248  XUoff.Size(3);
249  YUoff.Size(3);
250  XUoff[0] = 0; XUoff[1] = 1; XUoff[2] = 0;
251  YUoff[0] = 0; YUoff[1] = 0; YUoff[2] = 1;
252  }
253  else if (numPoints==9) {
254  // Generate a 9-point 2D Finite Difference matrix
255  Xoff.Size(9);
256  Yoff.Size(9);
257  Xoff[0] = -1; Xoff[1] = 0; Xoff[2] = 1;
258  Yoff[0] = -1; Yoff[1] = -1; Yoff[2] = -1;
259  Xoff[3] = -1; Xoff[4] = 0; Xoff[5] = 1;
260  Yoff[3] = 0; Yoff[4] = 0; Yoff[5] = 0;
261  Xoff[6] = -1; Xoff[7] = 0; Xoff[8] = 1;
262  Yoff[6] = 1; Yoff[7] = 1; Yoff[8] = 1;
263 
264  // Generate a 5-point lower triangular 2D Finite Difference matrix
265  XLoff.Size(5);
266  YLoff.Size(5);
267  XLoff[0] = -1; XLoff[1] = 0; Xoff[2] = 1;
268  YLoff[0] = -1; YLoff[1] = -1; Yoff[2] = -1;
269  XLoff[3] = -1; XLoff[4] = 0;
270  YLoff[3] = 0; YLoff[4] = 0;
271 
272  // Generate a 4-point upper triangular 2D Finite Difference matrix
273  XUoff.Size(4);
274  YUoff.Size(4);
275  XUoff[0] = 1;
276  YUoff[0] = 0;
277  XUoff[1] = -1; XUoff[2] = 0; XUoff[3] = 1;
278  YUoff[1] = 1; YUoff[2] = 1; YUoff[3] = 1;
279 
280  }
281  else {
282  // Generate a 25-point 2D Finite Difference matrix
283  Xoff.Size(25);
284  Yoff.Size(25);
285  int xi = 0, yi = 0;
286  int xo = -2, yo = -2;
287  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
288  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
289  xo = -2, yo++;
290  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
291  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
292  xo = -2, yo++;
293  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
294  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
295  xo = -2, yo++;
296  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
297  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
298  xo = -2, yo++;
299  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
300  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
301 
302  // Generate a 13-point lower triangular 2D Finite Difference matrix
303  XLoff.Size(13);
304  YLoff.Size(13);
305  xi = 0, yi = 0;
306  xo = -2, yo = -2;
307  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
308  YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ;
309  xo = -2, yo++;
310  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
311  YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ;
312  xo = -2, yo++;
313  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
314  YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ;
315 
316  // Generate a 13-point upper triangular 2D Finite Difference matrix
317  XUoff.Size(13);
318  YUoff.Size(13);
319  xi = 0, yi = 0;
320  xo = 0, yo = 0;
321  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
322  YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ;
323  xo = -2, yo++;
324  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
325  YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ;
326  xo = -2, yo++;
327  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
328  YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ;
329 
330  }
331 
332  Epetra_Map * map;
333  Epetra_Map * mapL;
334  Epetra_Map * mapU;
336  Epetra_CrsMatrix * L;
337  Epetra_CrsMatrix * U;
338  Epetra_MultiVector * b;
339  Epetra_MultiVector * bt;
340  Epetra_MultiVector * xexact;
341  Epetra_MultiVector * bL;
342  Epetra_MultiVector * btL;
343  Epetra_MultiVector * xexactL;
344  Epetra_MultiVector * bU;
345  Epetra_MultiVector * btU;
346  Epetra_MultiVector * xexactU;
347  Epetra_SerialDenseVector resvec(0);
348 
349  //Timings
350  Epetra_Flops flopcounter;
351  Epetra_Time timer(comm);
352 
353 #ifdef EPETRA_VERY_SHORT_PERFTEST
354  int jstop = 1;
355 #elif EPETRA_SHORT_PERFTEST
356  int jstop = 1;
357 #else
358  int jstop = 2;
359 #endif
360  for (int j=0; j<jstop; j++) {
361  for (int k=1; k<17; k++) {
362 #ifdef EPETRA_VERY_SHORT_PERFTEST
363  if (k<3 || (k%4==0 && k<9)) {
364 #elif EPETRA_SHORT_PERFTEST
365  if (k<6 || k%4==0) {
366 #else
367  if (k<7 || k%2==0) {
368 #endif
369  int nrhs=k;
370  if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";
371 
372  bool StaticProfile = (j!=0);
373  if (verbose) {
374  if (StaticProfile) cout << " static profile\n";
375  else cout << " dynamic profile\n";
376  }
377  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
378  Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
379  map, A, b, bt, xexact, StaticProfile, false);
380 
381 
382 #ifdef EPETRA_HAVE_JADMATRIX
383 
384  timer.ResetStartTime();
385  Epetra_JadMatrix JA(*A);
386  elapsed_time = timer.ElapsedTime();
387  if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;
388 
389  //cout << "A = " << *A << endl;
390  //cout << "JA = " << JA << endl;
391 
392  runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);
393 
394 #endif
395  runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);
396 
397  delete A;
398  delete b;
399  delete bt;
400  delete xexact;
401 
402  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
403  XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
404  mapL, L, bL, btL, xexactL, StaticProfile, true);
405 
406 
407  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
408  XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
409  mapU, U, bU, btU, xexactU, StaticProfile, true);
410 
411 
412  runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);
413 
414  delete L;
415  delete bL;
416  delete btL;
417  delete xexactL;
418  delete mapL;
419 
420  delete U;
421  delete bU;
422  delete btU;
423  delete xexactU;
424  delete mapU;
425 
426  Epetra_MultiVector q(*map, nrhs);
427  Epetra_MultiVector z(q);
428  Epetra_MultiVector r(q);
429 
430  delete map;
431  q.SetFlopCounter(flopcounter);
432  z.SetFlopCounter(q);
433  r.SetFlopCounter(q);
434 
435  resvec.Resize(nrhs);
436 
437 
438  flopcounter.ResetFlops();
439  timer.ResetStartTime();
440 
441  //10 norms
442  for( int i = 0; i < 10; ++i )
443  q.Norm2( resvec.Values() );
444 
445  elapsed_time = timer.ElapsedTime();
446  total_flops = q.Flops();
447  MFLOPs = total_flops/elapsed_time/1000000.0;
448  if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;
449 
450  if (summary) {
451  if (comm.NumProc()==1) cout << "Norm2" << '\t';
452  cout << MFLOPs << endl;
453  }
454 
455  flopcounter.ResetFlops();
456  timer.ResetStartTime();
457 
458  //10 dot's
459  for( int i = 0; i < 10; ++i )
460  q.Dot(z, resvec.Values());
461 
462  elapsed_time = timer.ElapsedTime();
463  total_flops = q.Flops();
464  MFLOPs = total_flops/elapsed_time/1000000.0;
465  if (verbose) cout << "Total MFLOPs for 10 Dot's = " << MFLOPs << endl;
466 
467  if (summary) {
468  if (comm.NumProc()==1) cout << "DotProd" << '\t';
469  cout << MFLOPs << endl;
470  }
471 
472  flopcounter.ResetFlops();
473  timer.ResetStartTime();
474 
475  //10 dot's
476  for( int i = 0; i < 10; ++i )
477  q.Update(1.0, z, 1.0, r, 0.0);
478 
479  elapsed_time = timer.ElapsedTime();
480  total_flops = q.Flops();
481  MFLOPs = total_flops/elapsed_time/1000000.0;
482  if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;
483 
484  if (summary) {
485  if (comm.NumProc()==1) cout << "Update" << '\t';
486  cout << MFLOPs << endl;
487  }
488  }
489  }
490  }
491 #ifdef EPETRA_MPI
492  MPI_Finalize() ;
493 #endif
494 
495 return ierr ;
496 }
497 
498 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
499 //
500 // nx (In) - number of grid points in x direction
501 // ny (In) - number of grid points in y direction
502 // The total number of equations will be nx*ny ordered such that the x direction changes
503 // most rapidly:
504 // First equation is at point (0,0)
505 // Second at (1,0)
506 // ...
507 // nx equation at (nx-1,0)
508 // nx+1st equation at (0,1)
509 
510 // numPoints (In) - number of points in finite difference stencil
511 // xoff (In) - stencil offsets in x direction (of length numPoints)
512 // yoff (In) - stencil offsets in y direction (of length numPoints)
513 // A standard 5-point finite difference stencil would be described as:
514 // numPoints = 5
515 // xoff = [-1, 1, 0, 0, 0]
516 // yoff = [ 0, 0, 0, -1, 1]
517 
518 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
519 
520 // comm (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
521 // map (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
522 // A (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
523 // Off-diagonal values are random between 0 and 1. If diagonal is part of stencil,
524 // diagonal will be slightly diag dominant.
525 // b (Out) - Generated RHS. Values satisfy b = A*xexact
526 // bt (Out) - Generated RHS. Values satisfy b = A'*xexact
527 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
528 
529 // Note: Caller of this function is responsible for deleting all output objects.
530 
531 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
532  int * xoff, int * yoff,
533  const Epetra_Comm &comm, bool verbose, bool summary,
534  Epetra_Map *& map,
535  Epetra_CrsMatrix *& A,
536  Epetra_Vector *& b,
537  Epetra_Vector *& bt,
538  Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
539 
540  Epetra_MultiVector * b1, * bt1, * xexact1;
541 
542  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
543  xoff, yoff, 1, comm, verbose, summary,
544  map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
545 
546  b = dynamic_cast<Epetra_Vector *>(b1);
547  bt = dynamic_cast<Epetra_Vector *>(bt1);
548  xexact = dynamic_cast<Epetra_Vector *>(xexact1);
549 
550  return;
551 }
552 
553 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
554  int * xoff, int * yoff, int nrhs,
555  const Epetra_Comm &comm, bool verbose, bool summary,
556  Epetra_Map *& map,
557  Epetra_CrsMatrix *& A,
558  Epetra_MultiVector *& b,
559  Epetra_MultiVector *& bt,
560  Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
561 
562  Epetra_Time timer(comm);
563  // Determine my global IDs
564  long long * myGlobalElements;
565  GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
566 
567  int numMyEquations = numNodesX*numNodesY;
568 
569  map = new Epetra_Map((long long)-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
570  delete [] myGlobalElements;
571 
572  long long numGlobalEquations = map->NumGlobalElements64();
573 
574  int profile = 0; if (StaticProfile) profile = numPoints;
575 
576 #ifdef EPETRA_HAVE_STATICPROFILE
577 
578  if (MakeLocalOnly)
579  A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
580  else
581  A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix
582 
583 #else
584 
585  if (MakeLocalOnly)
586  A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
587  else
588  A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix
589 
590 #endif
591 
592  long long * indices = new long long[numPoints];
593  double * values = new double[numPoints];
594 
595  double dnumPoints = (double) numPoints;
596  int nx = numNodesX*numProcsX;
597 
598  for (int i=0; i<numMyEquations; i++) {
599 
600  long long rowID = map->GID64(i);
601  int numIndices = 0;
602 
603  for (int j=0; j<numPoints; j++) {
604  long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
605  if (colID>-1 && colID<numGlobalEquations) {
606  indices[numIndices] = colID;
607  double value = - ((double) rand())/ ((double) RAND_MAX);
608  if (colID==rowID)
609  values[numIndices++] = dnumPoints - value; // Make diagonal dominant
610  else
611  values[numIndices++] = value;
612  }
613  }
614  //cout << "Building row " << rowID << endl;
615  A->InsertGlobalValues(rowID, numIndices, values, indices);
616  }
617 
618  delete [] indices;
619  delete [] values;
620  double insertTime = timer.ElapsedTime();
621  timer.ResetStartTime();
622  A->FillComplete(false);
623  double fillCompleteTime = timer.ElapsedTime();
624 
625  if (verbose)
626  cout << "Time to insert matrix values = " << insertTime << endl
627  << "Time to complete fill = " << fillCompleteTime << endl;
628  if (summary) {
629  if (comm.NumProc()==1) cout << "InsertTime" << '\t';
630  cout << insertTime << endl;
631  if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
632  cout << fillCompleteTime << endl;
633  }
634 
635  if (nrhs<=1) {
636  b = new Epetra_Vector(*map);
637  bt = new Epetra_Vector(*map);
638  xexact = new Epetra_Vector(*map);
639  }
640  else {
641  b = new Epetra_MultiVector(*map, nrhs);
642  bt = new Epetra_MultiVector(*map, nrhs);
643  xexact = new Epetra_MultiVector(*map, nrhs);
644  }
645 
646  xexact->Random(); // Fill xexact with random values
647 
648  A->Multiply(false, *xexact, *b);
649  A->Multiply(true, *xexact, *bt);
650 
651  return;
652 }
653 
654 
655 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
656 //
657 // nx (In) - number of grid points in x direction
658 // ny (In) - number of grid points in y direction
659 // The total number of equations will be nx*ny ordered such that the x direction changes
660 // most rapidly:
661 // First equation is at point (0,0)
662 // Second at (1,0)
663 // ...
664 // nx equation at (nx-1,0)
665 // nx+1st equation at (0,1)
666 
667 // numPoints (In) - number of points in finite difference stencil
668 // xoff (In) - stencil offsets in x direction (of length numPoints)
669 // yoff (In) - stencil offsets in y direction (of length numPoints)
670 // A standard 5-point finite difference stencil would be described as:
671 // numPoints = 5
672 // xoff = [-1, 1, 0, 0, 0]
673 // yoff = [ 0, 0, 0, -1, 1]
674 
675 // nsizes (In) - Length of element size list used to create variable block map and matrix
676 // sizes (In) - integer list of element sizes of length nsizes
677 // The map associated with this VbrMatrix will be created by cycling through the sizes list.
678 // For example, if nsize = 3 and sizes = [ 2, 4, 3], the block map will have elementsizes
679 // of 2, 4, 3, 2, 4, 3, ...
680 
681 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
682 
683 // comm (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
684 // map (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
685 // A (Out) - Epetra_VbrMatrix constructed for nx by ny grid using prescribed stencil
686 // Off-diagonal values are random between 0 and 1. If diagonal is part of stencil,
687 // diagonal will be slightly diag dominant.
688 // b (Out) - Generated RHS. Values satisfy b = A*xexact
689 // bt (Out) - Generated RHS. Values satisfy b = A'*xexact
690 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
691 
692 // Note: Caller of this function is responsible for deleting all output objects.
693 
694 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
695  int * xoff, int * yoff,
696  int nsizes, int * sizes,
697  const Epetra_Comm &comm, bool verbose, bool summary,
698  Epetra_BlockMap *& map,
699  Epetra_VbrMatrix *& A,
700  Epetra_Vector *& b,
701  Epetra_Vector *& bt,
702  Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
703 
704  Epetra_MultiVector * b1, * bt1, * xexact1;
705 
706  GenerateVbrProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
707  xoff, yoff, nsizes, sizes,
708  1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
709 
710  b = dynamic_cast<Epetra_Vector *>(b1);
711  bt = dynamic_cast<Epetra_Vector *>(bt1);
712  xexact = dynamic_cast<Epetra_Vector *>(xexact1);
713 
714  return;
715 }
716 
717 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
718  int * xoff, int * yoff,
719  int nsizes, int * sizes, int nrhs,
720  const Epetra_Comm &comm, bool verbose, bool summary,
721  Epetra_BlockMap *& map,
722  Epetra_VbrMatrix *& A,
723  Epetra_MultiVector *& b,
724  Epetra_MultiVector *& bt,
725  Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
726 
727  int i;
728 
729  // Determine my global IDs
730  long long * myGlobalElements;
731  GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
732 
733  int numMyElements = numNodesX*numNodesY;
734 
735  Epetra_Map ptMap((long long)-1, numMyElements, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
736  delete [] myGlobalElements;
737 
738  Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes
739  for (i=0; i<numMyElements; i++)
740  elementSizes[i] = sizes[ptMap.GID64(i)%nsizes]; // cycle through sizes array
741 
742  map = new Epetra_BlockMap((long long)-1, numMyElements, ptMap.MyGlobalElements64(), elementSizes.Values(),
743  ptMap.IndexBase64(), ptMap.Comm());
744 
745 
746 // FIXME: Won't compile until Epetra_VbrMatrix is modified.
747 #if 0
748  int profile = 0; if (StaticProfile) profile = numPoints;
749  int j;
750  long long numGlobalEquations = ptMap.NumGlobalElements64();
751 
752  if (MakeLocalOnly)
753  A = new Epetra_VbrMatrix(Copy, *map, *map, profile); // Construct matrix rowmap=colmap
754  else
755  A = new Epetra_VbrMatrix(Copy, *map, profile); // Construct matrix
756 
757  long long * indices = new long long[numPoints];
758 
759  // This section of code creates a vector of random values that will be used to create
760  // light-weight dense matrices to pass into the VbrMatrix construction process.
761 
762  int maxElementSize = 0;
763  for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]);
764 
765  Epetra_LocalMap lmap((long long)maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm());
766  Epetra_Vector randvec(lmap);
767  randvec.Random();
768  randvec.Scale(-1.0); // Make value negative
769  int nx = numNodesX*numProcsX;
770 
771 
772  for (i=0; i<numMyElements; i++) {
773  long long rowID = map->GID64(i);
774  int numIndices = 0;
775  int rowDim = sizes[rowID%nsizes];
776  for (j=0; j<numPoints; j++) {
777  long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
778  if (colID>-1 && colID<numGlobalEquations)
779  indices[numIndices++] = colID;
780  }
781 
782  A->BeginInsertGlobalValues(rowID, numIndices, indices);
783 
784  for (j=0; j < numIndices; j++) {
785  int colDim = sizes[indices[j]%nsizes];
786  A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim);
787  }
788  A->EndSubmitEntries();
789  }
790 
791  delete [] indices;
792 
793  A->FillComplete();
794 
795  // Compute the InvRowSums of the matrix rows
796  Epetra_Vector invRowSums(A->RowMap());
797  Epetra_Vector rowSums(A->RowMap());
798  A->InvRowSums(invRowSums);
799  rowSums.Reciprocal(invRowSums);
800 
801  // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant)
802  int numBlockDiagonalEntries;
803  int * rowColDims;
804  int * diagoffsets = map->FirstPointInElementList();
805  A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims);
806  for (i=0; i< numBlockDiagonalEntries; i++) {
807  double * diagVals;
808  int diagLDA;
809  A->ExtractBlockDiagonalEntryView(diagVals, diagLDA);
810  int rowDim = map->ElementSize(i);
811  for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
812  }
813 
814  if (nrhs<=1) {
815  b = new Epetra_Vector(*map);
816  bt = new Epetra_Vector(*map);
817  xexact = new Epetra_Vector(*map);
818  }
819  else {
820  b = new Epetra_MultiVector(*map, nrhs);
821  bt = new Epetra_MultiVector(*map, nrhs);
822  xexact = new Epetra_MultiVector(*map, nrhs);
823  }
824 
825  xexact->Random(); // Fill xexact with random values
826 
827  A->Multiply(false, *xexact, *b);
828  A->Multiply(true, *xexact, *bt);
829 
830 #endif // EPETRA_NO_32BIT_GLOBAL_INDICES
831 
832  return;
833 }
834 
835 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
836  int myPID, long long * & myGlobalElements) {
837 
838  myGlobalElements = new long long[numNodesX*numNodesY];
839  int myProcX = myPID%numProcsX;
840  int myProcY = myPID/numProcsX;
841  int curGID = myProcY*(numProcsX*numNodesX)*numNodesY+myProcX*numNodesX;
842  for (int j=0; j<numNodesY; j++) {
843  for (int i=0; i<numNodesX; i++) {
844  myGlobalElements[j*numNodesX+i] = curGID+i;
845  }
846  curGID+=numNodesX*numProcsX;
847  }
848  //for (int i=0; i<numNodesX*numNodesY; i++) cout << "MYPID " << myPID <<" GID "<< myGlobalElements[i] << endl;
849 
850  return;
851 }
852 
854  Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
855 
856  Epetra_MultiVector z(*b);
857  Epetra_MultiVector r(*b);
859 
860  //Timings
861  Epetra_Flops flopcounter;
862  A->SetFlopCounter(flopcounter);
863  Epetra_Time timer(A->Comm());
864  std::string statdyn = "dynamic";
865  if (StaticProfile) statdyn = "static ";
866 
867  for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
868 
869  bool TransA = (j==1 || j==3);
870  std::string contig = "without";
871  if (j>1) contig = "with ";
872 
873 #ifdef EPETRA_SHORT_PERFTEST
874  int kstart = 1;
875 #else
876  int kstart = 0;
877 #endif
878  for (int k=kstart; k<2; k++) { // Loop over old multiply vs. new multiply
879 
880  std::string oldnew = "old";
881  if (k>0) oldnew = "new";
882 
883  if (j==2) A->OptimizeStorage();
884 
885  flopcounter.ResetFlops();
886  timer.ResetStartTime();
887 
888  if (k==0) {
889  //10 matvecs
890 #ifndef EPETRA_SHORT_PERFTEST
891  for( int i = 0; i < 10; ++i )
892  A->Multiply1(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact using old Multiply method
893 #endif
894  }
895  else {
896  //10 matvecs
897  for( int i = 0; i < 10; ++i )
898  A->Multiply(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact
899  }
900 
901  double elapsed_time = timer.ElapsedTime();
902  double total_flops = A->Flops();
903 
904  // Compute residual
905  if (TransA)
906  r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
907  else
908  r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
909 
910  r.Norm2(resvec.Values());
911 
912  if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
913  double MFLOPs = total_flops/elapsed_time/1000000.0;
914  if (verbose) cout << "Total MFLOPs for 10 " << oldnew << " MatVec's with " << statdyn << " Profile (Trans = " << TransA
915  << ") and " << contig << " optimized storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
916  if (summary) {
917  if (A->Comm().NumProc()==1) {
918  if (TransA) cout << "TransMv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
919  else cout << "NoTransMv" << statdyn << "Prof" << contig << "OptStor" << '\t';
920  }
921  cout << MFLOPs << endl;
922  }
923  }
924  }
925  return;
926 }
927 #ifdef EPETRA_HAVE_JADMATRIX
929  Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
930 
931  Epetra_MultiVector z(*b);
932  Epetra_MultiVector r(*b);
934 
935  //Timings
936  Epetra_Flops flopcounter;
937  A->SetFlopCounter(flopcounter);
938  Epetra_Time timer(A->Comm());
939 
940  for (int j=0; j<2; j++) { // j = 0 is notrans, j = 1 is trans
941 
942  bool TransA = (j==1);
943  A->SetUseTranspose(TransA);
944  flopcounter.ResetFlops();
945  timer.ResetStartTime();
946 
947  //10 matvecs
948  for( int i = 0; i < 10; ++i )
949  A->Apply(*xexact, z); // Compute z = A*xexact or z = A'*xexact
950 
951  double elapsed_time = timer.ElapsedTime();
952  double total_flops = A->Flops();
953 
954  // Compute residual
955  if (TransA)
956  r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
957  else
958  r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
959 
960  r.Norm2(resvec.Values());
961 
962  if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
963  double MFLOPs = total_flops/elapsed_time/1000000.0;
964  if (verbose) cout << "Total MFLOPs for 10 " << " Jagged Diagonal MatVec's with (Trans = " << TransA
965  << ") " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
966  if (summary) {
967  if (A->Comm().NumProc()==1) {
968  if (TransA) cout << "TransMv" << '\t';
969  else cout << "NoTransMv" << '\t';
970  }
971  cout << MFLOPs << endl;
972  }
973  }
974  return;
975 }
976 #endif
977 //=========================================================================================
980  bool StaticProfile, bool verbose, bool summary) {
981 
982  if (L->NoDiagonal()) {
983  bL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to bL
984  btL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to btL
985  }
986  if (U->NoDiagonal()) {
987  bU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to bU
988  btU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to btU
989  }
990 
991  Epetra_MultiVector z(*bL);
992  Epetra_MultiVector r(*bL);
993  Epetra_SerialDenseVector resvec(bL->NumVectors());
994 
995  //Timings
996  Epetra_Flops flopcounter;
997  L->SetFlopCounter(flopcounter);
998  U->SetFlopCounter(flopcounter);
999  Epetra_Time timer(L->Comm());
1000  std::string statdyn = "dynamic";
1001  if (StaticProfile) statdyn = "static ";
1002 
1003  for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
1004 
1005  bool TransA = (j==1 || j==3);
1006  std::string contig = "without";
1007  if (j>1) contig = "with ";
1008 
1009  if (j==2) {
1010  L->OptimizeStorage();
1011  U->OptimizeStorage();
1012  }
1013 
1014  flopcounter.ResetFlops();
1015  timer.ResetStartTime();
1016 
1017  //10 lower solves
1018  bool Upper = false;
1019  bool UnitDiagonal = L->NoDiagonal(); // If no diagonal, then unit must be used
1020  Epetra_MultiVector * b = TransA ? btL : bL; // solve with the appropriate b vector
1021  for( int i = 0; i < 10; ++i )
1022  L->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
1023 
1024  double elapsed_time = timer.ElapsedTime();
1025  double total_flops = L->Flops();
1026 
1027  // Compute residual
1028  r.Update(-1.0, z, 1.0, *xexactL, 0.0); // r = bt - z
1029  r.Norm2(resvec.Values());
1030 
1031  if (resvec.NormInf()>0.000001) {
1032  cout << "resvec = " << resvec << endl;
1033  cout << "z = " << z << endl;
1034  cout << "xexactL = " << *xexactL << endl;
1035  cout << "r = " << r << endl;
1036  }
1037 
1038  if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
1039  double MFLOPs = total_flops/elapsed_time/1000000.0;
1040  if (verbose) cout << "Total MFLOPs for 10 " << " Lower solves " << statdyn << " Profile (Trans = " << TransA
1041  << ") and " << contig << " opt storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
1042  if (summary) {
1043  if (L->Comm().NumProc()==1) {
1044  if (TransA) cout << "TransLSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
1045  else cout << "NoTransLSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
1046  }
1047  cout << MFLOPs << endl;
1048  }
1049  flopcounter.ResetFlops();
1050  timer.ResetStartTime();
1051 
1052  //10 upper solves
1053  Upper = true;
1054  UnitDiagonal = U->NoDiagonal(); // If no diagonal, then unit must be used
1055  b = TransA ? btU : bU; // solve with the appropriate b vector
1056  for( int i = 0; i < 10; ++i )
1057  U->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
1058 
1059  elapsed_time = timer.ElapsedTime();
1060  total_flops = U->Flops();
1061 
1062  // Compute residual
1063  r.Update(-1.0, z, 1.0, *xexactU, 0.0); // r = bt - z
1064  r.Norm2(resvec.Values());
1065 
1066  if (resvec.NormInf()>0.001) {
1067  cout << "U = " << *U << endl;
1068  //cout << "resvec = " << resvec << endl;
1069  cout << "z = " << z << endl;
1070  cout << "xexactU = " << *xexactU << endl;
1071  //cout << "r = " << r << endl;
1072  cout << "b = " << *b << endl;
1073  }
1074 
1075 
1076  if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
1077  MFLOPs = total_flops/elapsed_time/1000000.0;
1078  if (verbose) cout << "Total MFLOPs for 10 " << " Upper solves " << statdyn << " Profile (Trans = " << TransA
1079  << ") and " << contig << " opt storage = " << MFLOPs <<endl;
1080  if (summary) {
1081  if (L->Comm().NumProc()==1) {
1082  if (TransA) cout << "TransUSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
1083  else cout << "NoTransUSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
1084  }
1085  cout << MFLOPs << endl;
1086  }
1087  }
1088  return;
1089 }
int ExtractBlockDiagonalEntryView(double *&Values, int &LDA) const
Extract a view of a block diagonal entry from the matrix.
Epetra_MultiVector: A class for constructing and using dense multi-vectors, vectors and matrices in p...
void SetFlopCounter(const Epetra_Flops &FlopCounter_in)
Set the internal Epetra_Flops() pointer.
int * FirstPointInElementList() const
Pointer to internal array containing a mapping between the local elements and the first local point n...
Epetra_Map: A class for partitioning vectors and matrices.
Definition: Epetra_Map.h:119
virtual int SetUseTranspose(bool use_transpose)
If set true, transpose of this operator will be applied.
int Random()
Set multi-vector values to random numbers.
int Multiply(bool TransA, const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Returns the result of a Epetra_VbrMatrix multiplied by a Epetra_MultiVector X in Y.
Epetra_IntSerialDenseVector: A class for constructing and using dense vectors.
int ElementSize() const
Returns the size of elements in the map; only valid if map has constant element size.
int Length() const
Returns length of vector.
int Multiply(bool TransA, const Epetra_Vector &x, Epetra_Vector &y) const
Returns the result of a Epetra_CrsMatrix multiplied by a Epetra_Vector x in y.
long long IndexBase64() const
Epetra_IntVector: A class for constructing and using dense integer vectors on a parallel computer...
int BeginExtractBlockDiagonalView(int &NumBlockDiagonalEntries, int *&RowColDims) const
Initiates a view of the block diagonal entries.
virtual const Epetra_Comm & Comm() const
Returns a pointer to the Epetra_Comm communicator associated with this matrix.
long long NumGlobalElements64() const
double ElapsedTime(void) const
Epetra_Time elapsed time function.
int Size(int Length_in)
Set length of a Epetra_IntSerialDenseVector object; init values to zero.
virtual int InsertGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
Insert a list of elements in a given global row of the matrix.
static void SetTracebackMode(int TracebackModeValue)
Set the value of the Epetra_Object error traceback report mode.
void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int *xoff, int *yoff, int nsizes, int *sizes, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_BlockMap *&map, Epetra_VbrMatrix *&A, Epetra_Vector *&b, Epetra_Vector *&bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly)
Epetra_VbrMatrix: A class for the construction and use of real-valued double-precision variable block...
double NormInf() const
Compute Inf-norm of each vector in multi-vector.
Epetra_Vector: A class for constructing and using dense vectors on a parallel computer.
void runJadMatrixTests(Epetra_JadMatrix *A, Epetra_MultiVector *b, Epetra_MultiVector *bt, Epetra_MultiVector *xexact, bool StaticProfile, bool verbose, bool summary)
int MyPID() const
Return my process ID.
Epetra_MpiComm: The Epetra MPI Communication Class.
std::string Epetra_Version()
int * Values() const
Returns a pointer to an array containing the values of this vector.
int NumVectors() const
Returns the number of vectors in the multi-vector.
Epetra_JadMatrix: A class for constructing matrix objects optimized for common kernels.
virtual int MyPID() const =0
Return my process ID.
int FillComplete(bool OptimizeDataStorage=true)
Signal that data entry is complete. Perform transformations to local index space. ...
int Resize(int Length_in)
Resize a Epetra_SerialDenseVector object.
Epetra_Time: The Epetra Timing Class.
Definition: Epetra_Time.h:75
int Dot(const Epetra_MultiVector &A, double *Result) const
Computes dot product of each corresponding pair of vectors.
int IndexBase() const
Index base for this map.
Epetra_SerialDenseVector: A class for constructing and using dense vectors.
long long GID64(int LID) const
Epetra_Comm: The Epetra Communication Abstract Base Class.
Definition: Epetra_Comm.h:73
void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int *xoff, int *yoff, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *&map, Epetra_CrsMatrix *&A, Epetra_Vector *&b, Epetra_Vector *&bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly)
virtual int Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const
Returns the result of a Epetra_RowMatrix applied to a Epetra_MultiVector X in Y.
int Solve(bool Upper, bool Trans, bool UnitDiagonal, const Epetra_Vector &x, Epetra_Vector &y) const
Returns the result of a local solve using the Epetra_CrsMatrix on a Epetra_Vector x in y...
int OptimizeStorage()
Make consecutive row index sections contiguous, minimize internal storage used for constructing graph...
bool NoDiagonal() const
If matrix has no diagonal entries in global index space, this query returns true, otherwise it return...
Epetra_BlockMap: A class for partitioning block element vectors and matrices.
const Epetra_BlockMap & RowMap() const
Returns the RowMap object as an Epetra_BlockMap (the Epetra_Map base class) needed for implementing E...
int EndSubmitEntries()
Completes processing of all data passed in for the current block row.
int NumProc() const
Returns total number of processes (always returns 1 for SerialComm).
const Epetra_Comm & Comm() const
Access function for Epetra_Comm communicator.
Epetra_SerialComm: The Epetra Serial Communication Class.
void ResetFlops()
Resets the number of floating point operations to zero for this multi-vector.
Definition: Epetra_Flops.h:77
Epetra_Flops: The Epetra Floating Point Operations Class.
Definition: Epetra_Flops.h:58
int Update(double ScalarA, const Epetra_MultiVector &A, double ScalarThis)
Update multi-vector values with scaled values of A, this = ScalarThis*this + ScalarA*A.
virtual int NumProc() const =0
Returns total number of processes.
int InvRowSums(Epetra_Vector &x) const
Computes the sum of absolute values of the rows of the Epetra_VbrMatrix, results returned in x...
Epetra_CrsMatrix: A class for constructing and using real-valued double-precision sparse compressed r...
int Norm2(double *Result) const
Compute 2-norm of each vector in multi-vector.
double Flops() const
Returns the number of floating point operations with this multi-vector.
int FillComplete()
Signal that data entry is complete, perform transformations to local index space. ...
int BeginInsertGlobalValues(int BlockRow, int NumBlockEntries, int *BlockIndices)
Initiate insertion of a list of elements in a given global row of the matrix, values are inserted via...
int SubmitBlockEntry(double *Values, int LDA, int NumRows, int NumCols)
Submit a block entry to the indicated block row and column specified in the Begin routine...
int main(int argc, char *argv[])
void runMatrixTests(Epetra_CrsMatrix *A, Epetra_MultiVector *b, Epetra_MultiVector *bt, Epetra_MultiVector *xexact, bool StaticProfile, bool verbose, bool summary)
void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs, int myPID, int *&myGlobalElements)
double * Values() const
Returns pointer to the values in vector.
long long * MyGlobalElements64() const
int * Values()
Returns pointer to the values in vector.
void runLUMatrixTests(Epetra_CrsMatrix *L, Epetra_MultiVector *bL, Epetra_MultiVector *btL, Epetra_MultiVector *xexactL, Epetra_CrsMatrix *U, Epetra_MultiVector *bU, Epetra_MultiVector *btU, Epetra_MultiVector *xexactU, bool StaticProfile, bool verbose, bool summary)
void ResetStartTime(void)
Epetra_Time function to reset the start time for a timer object.
int Multiply1(bool TransA, const Epetra_Vector &x, Epetra_Vector &y) const
const Epetra_Comm & Comm() const
Returns a pointer to the Epetra_Comm communicator associated with this matrix.
#define EPETRA_MAX(x, y)
Epetra_LocalMap: A class for replicating vectors and matrices across multiple processors.