43 #define EPETRA_HAVE_JADMATRIX 
   44 #define EPETRA_VERY_SHORT_PERFTEST 
   45 #define EPETRA_HAVE_STATICPROFILE 
   64 #include "../epetra_test_err.h" 
   66 #ifdef EPETRA_HAVE_JADMATRIX 
   72 void GenerateCrsProblem(
int numNodesX, 
int numNodesY, 
int numProcsX, 
int numProcsY, 
int numPoints,
 
   73       int * xoff, 
int * yoff,
 
   74       const Epetra_Comm  &comm, 
bool verbose, 
bool summary,
 
   79       Epetra_Vector *&xexact, 
bool StaticProfile, 
bool MakeLocalOnly);
 
   81 void GenerateCrsProblem(
int numNodesX, 
int numNodesY, 
int numProcsX, 
int numProcsY, 
int numPoints,
 
   82       int * xoff, 
int * yoff, 
int nrhs,
 
   83       const Epetra_Comm  &comm, 
bool verbose, 
bool summary,
 
   90 void GenerateVbrProblem(
int numNodesX, 
int numNodesY, 
int numProcsX, 
int numProcsY, 
int numPoints,
 
   91       int * xoff, 
int * yoff,
 
   92       int nsizes, 
int * sizes,
 
   93       const Epetra_Comm  &comm, 
bool verbose, 
bool summary,
 
   98       Epetra_Vector *&xexact, 
bool StaticProfile, 
bool MakeLocalOnly);
 
  100 void GenerateVbrProblem(
int numNodesX, 
int numNodesY, 
int numProcsX, 
int numProcsY, 
int numPoints,
 
  101       int * xoff, 
int * yoff,
 
  102       int nsizes, 
int * sizes, 
int nrhs,
 
  103       const Epetra_Comm  &comm, 
bool verbose, 
bool summary,
 
  111             int myPID, 
long long * & myGlobalElements);
 
  115 #ifdef EPETRA_HAVE_JADMATRIX 
  121           bool StaticProfile, 
bool verbose, 
bool summary);
 
  122 int main(
int argc, 
char *argv[])
 
  133   MPI_Init(&argc,&argv);
 
  139   bool verbose = 
false;
 
  140   bool summary = 
false;
 
  143   if (argc>6) 
if (argv[6][0]==
'-' && argv[6][1]==
'v') verbose = 
true;
 
  146   if (argc>6) 
if (argv[6][0]==
'-' && argv[6][1]==
's') summary = 
true;
 
  149     cerr << 
"Usage: " << argv[0]
 
  150          << 
" NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
 
  152          << 
"NumNodesX         - Number of mesh nodes in X direction per processor" << endl
 
  153          << 
"NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
 
  154          << 
"NumProcX          - Number of processors to use in X direction" << endl
 
  155          << 
"NumProcY          - Number of processors to use in Y direction" << endl
 
  156          << 
"NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
 
  157          << 
"-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
 
  158          << 
" NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
 
  159    << 
" Serial example:" << endl
 
  160          << argv[0] << 
" 16 12 1 1 25 -v" << endl
 
  161    << 
" Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
 
  162    << 
" MPI example:" << endl
 
  163          << 
"mpirun -np 32 " << argv[0] << 
" 10 12 4 8 9 -v" << endl
 
  164    << 
" Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
 
  165    << 
" in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
 
  176   if (verbose && comm.
MyPID()==0)
 
  178   if (summary && comm.
MyPID()==0) {
 
  182       cout << endl << endl; 
 
  185   if (verbose) cout << comm <<endl;
 
  190   if (verbose && comm.
MyPID()!=0) verbose = 
false;
 
  191   if (summary && comm.
MyPID()!=0) summary = 
false;
 
  193   int numNodesX = atoi(argv[1]);
 
  194   int numNodesY = atoi(argv[2]);
 
  195   int numProcsX = atoi(argv[3]);
 
  196   int numProcsY = atoi(argv[4]);
 
  197   int numPoints = atoi(argv[5]);
 
  199   if (verbose || (summary && comm.
NumProc()==1)) {
 
  200     cout << 
" Number of local nodes in X direction  = " << numNodesX << endl
 
  201    << 
" Number of local nodes in Y direction  = " << numNodesY << endl
 
  202    << 
" Number of global nodes in X direction = " << numNodesX*numProcsX << endl
 
  203    << 
" Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
 
  204    << 
" Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
 
  205    << 
" Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
 
  206    << 
" Number of Processors in X direction   = " << numProcsX << endl
 
  207    << 
" Number of Processors in Y direction   = " << numProcsY << endl
 
  208    << 
" Number of Points in stencil           = " << numPoints << endl << endl;
 
  211   if (summary && comm.
NumProc()>1)
 
  212     cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;
 
  214   if (numProcsX*numProcsY!=comm.
NumProc()) {
 
  215     cerr << 
"Number of processors = " << comm.
NumProc() << endl
 
  216    << 
" is not the product of " << numProcsX << 
" and " << numProcsY << endl << endl;
 
  220   if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
 
  221     cerr << 
"Number of points specified = " << numPoints << endl
 
  222    << 
" is not 5, 9, 25" << endl << endl;
 
  226   if (numNodesX*numNodesY<=0) {
 
  227     cerr << 
"Product of number of nodes is <= zero" << endl << endl;
 
  238     Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0;
 
  239     Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1;
 
  244     XLoff[0] = -1; XLoff[1] =  0;
 
  245     YLoff[0] =  0; YLoff[1] = -1;
 
  250     XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0;
 
  251     YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
 
  253   else if (numPoints==9) {
 
  257     Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1;
 
  258     Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1;
 
  259     Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1;
 
  260     Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0;
 
  261     Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1;
 
  262     Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1;
 
  267     XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1;
 
  268     YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1;
 
  269     XLoff[3] = -1;  XLoff[4] =  0;
 
  270     YLoff[3] =  0;  YLoff[4] =  0;
 
  277     XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1;
 
  278     YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1;
 
  286     int xo = -2, yo = -2;
 
  287     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
 
  288     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
 
  290     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
 
  291     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
 
  293     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
 
  294     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
 
  296     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
 
  297     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
 
  299     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
 
  300     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
 
  307     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
 
  308     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
 
  310     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
 
  311     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
 
  313     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
 
  314     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
 
  321     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
 
  322     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
 
  324     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
 
  325     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
 
  327     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
 
  328     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
 
  353 #ifdef EPETRA_VERY_SHORT_PERFTEST 
  355 #elif EPETRA_SHORT_PERFTEST 
  360   for (
int j=0; j<jstop; j++) {
 
  361     for (
int k=1; k<17; k++) {
 
  362 #ifdef EPETRA_VERY_SHORT_PERFTEST 
  363       if (k<3 || (k%4==0 && k<9)) {
 
  364 #elif EPETRA_SHORT_PERFTEST 
  370       if (verbose) cout << 
"\n*************** Results for " << nrhs << 
" RHS with ";
 
  372       bool StaticProfile = (j!=0);
 
  374         if (StaticProfile) cout << 
" static profile\n";
 
  375         else cout << 
" dynamic profile\n";
 
  378        Xoff.
Values(), Yoff.
Values(), nrhs, comm, verbose, summary,
 
  379        map, A, b, bt, xexact, StaticProfile, 
false);
 
  382 #ifdef EPETRA_HAVE_JADMATRIX 
  387       if (verbose) cout << 
"Time to create Jagged diagonal matrix = " << elapsed_time << endl;
 
  395       runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);
 
  403        XLoff.
Values(), YLoff.
Values(), nrhs, comm, verbose, summary,
 
  404        mapL, L, bL, btL, xexactL, StaticProfile, 
true);
 
  408        XUoff.
Values(), YUoff.
Values(), nrhs, comm, verbose, summary,
 
  409        mapU, U, bU, btU, xexactU, StaticProfile, 
true);
 
  412       runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);
 
  442       for( 
int i = 0; i < 10; ++i )
 
  446       total_flops = q.
Flops();
 
  447       MFLOPs = total_flops/elapsed_time/1000000.0;
 
  448       if (verbose) cout << 
"\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;
 
  451   if (comm.NumProc()==1) cout << 
"Norm2" << 
'\t';
 
  452   cout << MFLOPs << endl;
 
  459       for( 
int i = 0; i < 10; ++i )
 
  463       total_flops = q.
Flops();
 
  464       MFLOPs = total_flops/elapsed_time/1000000.0;
 
  465       if (verbose) cout << 
"Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;
 
  468   if (comm.NumProc()==1) cout << 
"DotProd" << 
'\t';
 
  469   cout << MFLOPs << endl;
 
  476       for( 
int i = 0; i < 10; ++i )
 
  477   q.
Update(1.0, z, 1.0, r, 0.0);
 
  480       total_flops = q.
Flops();
 
  481       MFLOPs = total_flops/elapsed_time/1000000.0;
 
  482       if (verbose) cout << 
"Total MFLOPs for 10 Updates= " << MFLOPs << endl;
 
  485   if (comm.NumProc()==1) cout << 
"Update" << 
'\t';
 
  486   cout << MFLOPs << endl;
 
  531 void GenerateCrsProblem(
int numNodesX, 
int numNodesY, 
int numProcsX, 
int numProcsY, 
int numPoints,
 
  532       int * xoff, 
int * yoff,
 
  533       const Epetra_Comm  &comm, 
bool verbose, 
bool summary,
 
  538       Epetra_Vector *&xexact, 
bool StaticProfile, 
bool MakeLocalOnly) {
 
  543          xoff, yoff, 1, comm, verbose, summary,
 
  544          map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
 
  553 void GenerateCrsProblem(
int numNodesX, 
int numNodesY, 
int numProcsX, 
int numProcsY, 
int numPoints,
 
  554       int * xoff, 
int * yoff, 
int nrhs,
 
  555       const Epetra_Comm  &comm, 
bool verbose, 
bool summary,
 
  564   long long * myGlobalElements;
 
  567   int numMyEquations = numNodesX*numNodesY;
 
  569   map = 
new Epetra_Map((
long long)-1, numMyEquations, myGlobalElements, 0, comm); 
 
  570   delete [] myGlobalElements;
 
  574   int profile = 0; 
if (StaticProfile) profile = numPoints;
 
  576 #ifdef EPETRA_HAVE_STATICPROFILE 
  592   long long * indices = 
new long long[numPoints];
 
  593   double * values = 
new double[numPoints];
 
  595   double dnumPoints = (double) numPoints;
 
  596   int nx = numNodesX*numProcsX;
 
  598   for (
int i=0; i<numMyEquations; i++) {
 
  600     long long rowID = map->
GID64(i);
 
  603     for (
int j=0; j<numPoints; j++) {
 
  604       long long colID = rowID + xoff[j] + nx*yoff[j]; 
 
  605       if (colID>-1 && colID<numGlobalEquations) {
 
  606   indices[numIndices] = colID;
 
  607   double value = - ((double) rand())/ ((
double) RAND_MAX);
 
  609     values[numIndices++] = dnumPoints - value; 
 
  611     values[numIndices++] = value;
 
  626     cout << 
"Time to insert matrix values = " << insertTime << endl
 
  627    << 
"Time to complete fill        = " << fillCompleteTime << endl;
 
  629     if (comm.
NumProc()==1) cout << 
"InsertTime" << 
'\t';
 
  630     cout << insertTime << endl;
 
  631     if (comm.
NumProc()==1) cout << 
"FillCompleteTime" << 
'\t';
 
  632     cout << fillCompleteTime << endl;
 
  694 void GenerateVbrProblem(
int numNodesX, 
int numNodesY, 
int numProcsX, 
int numProcsY, 
int numPoints,
 
  695       int * xoff, 
int * yoff,
 
  696       int nsizes, 
int * sizes,
 
  697       const Epetra_Comm  &comm, 
bool verbose, 
bool summary,
 
  702       Epetra_Vector *&xexact, 
bool StaticProfile, 
bool MakeLocalOnly) {
 
  707          xoff, yoff, nsizes, sizes,
 
  708          1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
 
  717 void GenerateVbrProblem(
int numNodesX, 
int numNodesY, 
int numProcsX, 
int numProcsY, 
int numPoints,
 
  718       int * xoff, 
int * yoff,
 
  719       int nsizes, 
int * sizes, 
int nrhs,
 
  720       const Epetra_Comm  &comm, 
bool verbose, 
bool summary,
 
  730   long long * myGlobalElements;
 
  733   int numMyElements = numNodesX*numNodesY;
 
  735   Epetra_Map ptMap((
long long)-1, numMyElements, myGlobalElements, 0, comm); 
 
  736   delete [] myGlobalElements;
 
  739   for (i=0; i<numMyElements; i++)
 
  740     elementSizes[i] = sizes[ptMap.
GID64(i)%nsizes]; 
 
  748   int profile = 0; 
if (StaticProfile) profile = numPoints;
 
  757   long long * indices = 
new long long[numPoints];
 
  762   int maxElementSize = 0;
 
  763   for (i=0; i< nsizes; i++) maxElementSize = 
EPETRA_MAX(maxElementSize, sizes[i]);
 
  769   int nx = numNodesX*numProcsX;
 
  772   for (i=0; i<numMyElements; i++) {
 
  773     long long rowID = map->
GID64(i);
 
  775     int rowDim = sizes[rowID%nsizes];
 
  776     for (j=0; j<numPoints; j++) {
 
  777       long long colID = rowID + xoff[j] + nx*yoff[j]; 
 
  778       if (colID>-1 && colID<numGlobalEquations)
 
  779   indices[numIndices++] = colID;
 
  784     for (j=0; j < numIndices; j++) {
 
  785       int colDim = sizes[indices[j]%nsizes];
 
  799   rowSums.Reciprocal(invRowSums);
 
  802   int numBlockDiagonalEntries;
 
  806   for (i=0; i< numBlockDiagonalEntries; i++) {
 
  811     for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
 
  830 #endif // EPETRA_NO_32BIT_GLOBAL_INDICES 
  836             int myPID, 
long long * & myGlobalElements) {
 
  838   myGlobalElements = 
new long long[numNodesX*numNodesY];
 
  839   int myProcX = myPID%numProcsX;
 
  840   int myProcY = myPID/numProcsX;
 
  841   int curGID = myProcY*(numProcsX*numNodesX)*numNodesY+myProcX*numNodesX;
 
  842   for (
int j=0; j<numNodesY; j++) {
 
  843     for (
int i=0; i<numNodesX; i++) {
 
  844       myGlobalElements[j*numNodesX+i] = curGID+i;
 
  846     curGID+=numNodesX*numProcsX;
 
  864   std::string statdyn =        
"dynamic";
 
  865   if (StaticProfile) statdyn = 
"static ";
 
  867   for (
int j=0; j<4; j++) { 
 
  869     bool TransA = (j==1 || j==3);
 
  870     std::string contig = 
"without";
 
  871     if (j>1) contig =    
"with   ";
 
  873 #ifdef EPETRA_SHORT_PERFTEST 
  878     for (
int k=kstart; k<2; k++) { 
 
  880       std::string oldnew = 
"old";
 
  881       if (k>0) oldnew =    
"new";
 
  885       flopcounter.ResetFlops();
 
  890 #ifndef EPETRA_SHORT_PERFTEST 
  891   for( 
int i = 0; i < 10; ++i )
 
  897   for( 
int i = 0; i < 10; ++i )
 
  902       double total_flops = A->
Flops();
 
  906   r.Update(-1.0, z, 1.0, *bt, 0.0); 
 
  908   r.Update(-1.0, z, 1.0, *b, 0.0); 
 
  912       if (verbose) cout << 
"ResNorm = " << resvec.
NormInf() << 
": ";
 
  913       double MFLOPs = total_flops/elapsed_time/1000000.0;
 
  914       if (verbose) cout << 
"Total MFLOPs for 10 " << oldnew << 
" MatVec's with " << statdyn << 
" Profile (Trans = " << TransA
 
  915       << 
")  and " << contig << 
" optimized storage = " << MFLOPs << 
" (" << elapsed_time << 
" s)" <<endl;
 
  918     if (TransA) cout << 
"TransMv" << statdyn<< 
"Prof" << contig << 
"OptStor" << 
'\t';
 
  919     else cout << 
"NoTransMv" << statdyn << 
"Prof" << contig << 
"OptStor" << 
'\t';
 
  921   cout << MFLOPs << endl;
 
  927 #ifdef EPETRA_HAVE_JADMATRIX 
  940   for (
int j=0; j<2; j++) { 
 
  942     bool TransA = (j==1);
 
  944     flopcounter.ResetFlops();
 
  948     for( 
int i = 0; i < 10; ++i )
 
  949       A->
Apply(*xexact, z); 
 
  952     double total_flops = A->
Flops();
 
  956       r.Update(-1.0, z, 1.0, *bt, 0.0); 
 
  958       r.Update(-1.0, z, 1.0, *b, 0.0); 
 
  962     if (verbose) cout << 
"ResNorm = " << resvec.
NormInf() << 
": ";
 
  963     double MFLOPs = total_flops/elapsed_time/1000000.0;
 
  964     if (verbose) cout << 
"Total MFLOPs for 10 " << 
" Jagged Diagonal MatVec's with (Trans = " << TransA
 
  965           << 
") " << MFLOPs << 
" (" << elapsed_time << 
" s)" <<endl;
 
  968   if (TransA) cout << 
"TransMv" << 
'\t';
 
  969   else cout << 
"NoTransMv" << 
'\t';
 
  971       cout << MFLOPs << endl;
 
  980           bool StaticProfile, 
bool verbose, 
bool summary) {
 
  983     bL->
Update(1.0, *xexactL, 1.0); 
 
  984     btL->
Update(1.0, *xexactL, 1.0); 
 
  987     bU->
Update(1.0, *xexactU, 1.0); 
 
  988     btU->
Update(1.0, *xexactU, 1.0); 
 
 1000   std::string statdyn =        
"dynamic";
 
 1001   if (StaticProfile) statdyn = 
"static ";
 
 1003   for (
int j=0; j<4; j++) { 
 
 1005     bool TransA = (j==1 || j==3);
 
 1006     std::string contig = 
"without";
 
 1007     if (j>1) contig =    
"with   ";
 
 1014     flopcounter.ResetFlops();
 
 1021     for( 
int i = 0; i < 10; ++i )
 
 1022       L->
Solve(Upper, TransA, UnitDiagonal, *b, z); 
 
 1025     double total_flops = L->
Flops();
 
 1028     r.Update(-1.0, z, 1.0, *xexactL, 0.0); 
 
 1029     r.Norm2(resvec.
Values());
 
 1031     if (resvec.
NormInf()>0.000001) {
 
 1032       cout << 
"resvec = " << resvec << endl;
 
 1033       cout << 
"z = " << z << endl;
 
 1034       cout << 
"xexactL = " << *xexactL << endl;
 
 1035       cout << 
"r = " << r << endl;
 
 1038     if (verbose) cout << 
"ResNorm = " << resvec.
NormInf() << 
": ";
 
 1039     double MFLOPs = total_flops/elapsed_time/1000000.0;
 
 1040     if (verbose) cout << 
"Total MFLOPs for 10 " << 
" Lower solves " << statdyn << 
" Profile (Trans = " << TransA
 
 1041           << 
")  and " << contig << 
" opt storage = " << MFLOPs << 
" (" << elapsed_time << 
" s)" <<endl;
 
 1044   if (TransA) cout << 
"TransLSv" << statdyn<< 
"Prof" << contig << 
"OptStor" << 
'\t';
 
 1045   else cout << 
"NoTransLSv" << statdyn << 
"Prof" << contig << 
"OptStor" << 
'\t';
 
 1047       cout << MFLOPs << endl;
 
 1049     flopcounter.ResetFlops();
 
 1055     b = TransA ? btU : bU;  
 
 1056     for( 
int i = 0; i < 10; ++i )
 
 1057       U->
Solve(Upper, TransA, UnitDiagonal, *b, z); 
 
 1060     total_flops = U->
Flops();
 
 1063     r.Update(-1.0, z, 1.0, *xexactU, 0.0); 
 
 1064     r.Norm2(resvec.
Values());
 
 1067       cout << 
"U = " << *U << endl;
 
 1069       cout << 
"z = " << z << endl;
 
 1070       cout << 
"xexactU = " << *xexactU << endl;
 
 1072       cout << 
"b = " << *b << endl;
 
 1076     if (verbose) cout << 
"ResNorm = " << resvec.
NormInf() << 
": ";
 
 1077     MFLOPs = total_flops/elapsed_time/1000000.0;
 
 1078     if (verbose) cout << 
"Total MFLOPs for 10 " << 
" Upper solves " << statdyn << 
" Profile (Trans = " << TransA
 
 1079           << 
")  and " << contig << 
" opt storage = " << MFLOPs <<endl;
 
 1082   if (TransA) cout << 
"TransUSv" << statdyn<< 
"Prof" << contig << 
"OptStor" << 
'\t';
 
 1083   else cout << 
"NoTransUSv" << statdyn << 
"Prof" << contig << 
"OptStor" << 
'\t';
 
 1085       cout << MFLOPs << endl;
 
int ExtractBlockDiagonalEntryView(double *&Values, int &LDA) const 
Extract a view of a block diagonal entry from the matrix. 
 
Epetra_MultiVector: A class for constructing and using dense multi-vectors, vectors and matrices in p...
 
void SetFlopCounter(const Epetra_Flops &FlopCounter_in)
Set the internal Epetra_Flops() pointer. 
 
int * FirstPointInElementList() const 
Pointer to internal array containing a mapping between the local elements and the first local point n...
 
Epetra_Map: A class for partitioning vectors and matrices. 
 
virtual int SetUseTranspose(bool use_transpose)
If set true, transpose of this operator will be applied. 
 
int Random()
Set multi-vector values to random numbers. 
 
int Multiply(bool TransA, const Epetra_MultiVector &X, Epetra_MultiVector &Y) const 
Returns the result of a Epetra_VbrMatrix multiplied by a Epetra_MultiVector X in Y. 
 
Epetra_IntSerialDenseVector: A class for constructing and using dense vectors. 
 
int ElementSize() const 
Returns the size of elements in the map; only valid if map has constant element size. 
 
int Length() const 
Returns length of vector. 
 
int Multiply(bool TransA, const Epetra_Vector &x, Epetra_Vector &y) const 
Returns the result of a Epetra_CrsMatrix multiplied by a Epetra_Vector x in y. 
 
long long IndexBase64() const 
 
Epetra_IntVector: A class for constructing and using dense integer vectors on a parallel computer...
 
int BeginExtractBlockDiagonalView(int &NumBlockDiagonalEntries, int *&RowColDims) const 
Initiates a view of the block diagonal entries. 
 
virtual const Epetra_Comm & Comm() const 
Returns a pointer to the Epetra_Comm communicator associated with this matrix. 
 
long long NumGlobalElements64() const 
 
double ElapsedTime(void) const 
Epetra_Time elapsed time function. 
 
int Size(int Length_in)
Set length of a Epetra_IntSerialDenseVector object; init values to zero. 
 
virtual int InsertGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
Insert a list of elements in a given global row of the matrix. 
 
static void SetTracebackMode(int TracebackModeValue)
Set the value of the Epetra_Object error traceback report mode. 
 
void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int *xoff, int *yoff, int nsizes, int *sizes, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_BlockMap *&map, Epetra_VbrMatrix *&A, Epetra_Vector *&b, Epetra_Vector *&bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly)
 
Epetra_VbrMatrix: A class for the construction and use of real-valued double-precision variable block...
 
double NormInf() const 
Compute Inf-norm of each vector in multi-vector. 
 
Epetra_Vector: A class for constructing and using dense vectors on a parallel computer. 
 
void runJadMatrixTests(Epetra_JadMatrix *A, Epetra_MultiVector *b, Epetra_MultiVector *bt, Epetra_MultiVector *xexact, bool StaticProfile, bool verbose, bool summary)
 
int MyPID() const 
Return my process ID. 
 
Epetra_MpiComm: The Epetra MPI Communication Class. 
 
std::string Epetra_Version()
 
int * Values() const 
Returns a pointer to an array containing the values of this vector. 
 
int NumVectors() const 
Returns the number of vectors in the multi-vector. 
 
Epetra_JadMatrix: A class for constructing matrix objects optimized for common kernels. 
 
virtual int MyPID() const =0
Return my process ID. 
 
int FillComplete(bool OptimizeDataStorage=true)
Signal that data entry is complete. Perform transformations to local index space. ...
 
int Resize(int Length_in)
Resize a Epetra_SerialDenseVector object. 
 
Epetra_Time: The Epetra Timing Class. 
 
int Dot(const Epetra_MultiVector &A, double *Result) const 
Computes dot product of each corresponding pair of vectors. 
 
int IndexBase() const 
Index base for this map. 
 
Epetra_SerialDenseVector: A class for constructing and using dense vectors. 
 
long long GID64(int LID) const 
 
Epetra_Comm: The Epetra Communication Abstract Base Class. 
 
void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int *xoff, int *yoff, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *&map, Epetra_CrsMatrix *&A, Epetra_Vector *&b, Epetra_Vector *&bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly)
 
virtual int Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const 
Returns the result of a Epetra_RowMatrix applied to a Epetra_MultiVector X in Y. 
 
int Solve(bool Upper, bool Trans, bool UnitDiagonal, const Epetra_Vector &x, Epetra_Vector &y) const 
Returns the result of a local solve using the Epetra_CrsMatrix on a Epetra_Vector x in y...
 
int OptimizeStorage()
Make consecutive row index sections contiguous, minimize internal storage used for constructing graph...
 
bool NoDiagonal() const 
If matrix has no diagonal entries in global index space, this query returns true, otherwise it return...
 
Epetra_BlockMap: A class for partitioning block element vectors and matrices. 
 
const Epetra_BlockMap & RowMap() const 
Returns the RowMap object as an Epetra_BlockMap (the Epetra_Map base class) needed for implementing E...
 
int EndSubmitEntries()
Completes processing of all data passed in for the current block row. 
 
int NumProc() const 
Returns total number of processes (always returns 1 for SerialComm). 
 
const Epetra_Comm & Comm() const 
Access function for Epetra_Comm communicator. 
 
Epetra_SerialComm: The Epetra Serial Communication Class. 
 
void ResetFlops()
Resets the number of floating point operations to zero for this multi-vector. 
 
Epetra_Flops: The Epetra Floating Point Operations Class. 
 
int Update(double ScalarA, const Epetra_MultiVector &A, double ScalarThis)
Update multi-vector values with scaled values of A, this = ScalarThis*this + ScalarA*A. 
 
virtual int NumProc() const =0
Returns total number of processes. 
 
int InvRowSums(Epetra_Vector &x) const 
Computes the sum of absolute values of the rows of the Epetra_VbrMatrix, results returned in x...
 
Epetra_CrsMatrix: A class for constructing and using real-valued double-precision sparse compressed r...
 
int Norm2(double *Result) const 
Compute 2-norm of each vector in multi-vector. 
 
double Flops() const 
Returns the number of floating point operations with this multi-vector. 
 
int FillComplete()
Signal that data entry is complete, perform transformations to local index space. ...
 
int BeginInsertGlobalValues(int BlockRow, int NumBlockEntries, int *BlockIndices)
Initiate insertion of a list of elements in a given global row of the matrix, values are inserted via...
 
int SubmitBlockEntry(double *Values, int LDA, int NumRows, int NumCols)
Submit a block entry to the indicated block row and column specified in the Begin routine...
 
int main(int argc, char *argv[])
 
void runMatrixTests(Epetra_CrsMatrix *A, Epetra_MultiVector *b, Epetra_MultiVector *bt, Epetra_MultiVector *xexact, bool StaticProfile, bool verbose, bool summary)
 
void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs, int myPID, int *&myGlobalElements)
 
double * Values() const 
Returns pointer to the values in vector. 
 
long long * MyGlobalElements64() const 
 
int * Values()
Returns pointer to the values in vector. 
 
void runLUMatrixTests(Epetra_CrsMatrix *L, Epetra_MultiVector *bL, Epetra_MultiVector *btL, Epetra_MultiVector *xexactL, Epetra_CrsMatrix *U, Epetra_MultiVector *bU, Epetra_MultiVector *btU, Epetra_MultiVector *xexactU, bool StaticProfile, bool verbose, bool summary)
 
void ResetStartTime(void)
Epetra_Time function to reset the start time for a timer object. 
 
int Multiply1(bool TransA, const Epetra_Vector &x, Epetra_Vector &y) const 
 
const Epetra_Comm & Comm() const 
Returns a pointer to the Epetra_Comm communicator associated with this matrix. 
 
Epetra_LocalMap: A class for replicating vectors and matrices across multiple processors.