65 void GenerateCrsProblem(
int numNodesX,
int numNodesY,
int numProcsX,
int numProcsY,
int numPoints,
66 int * xoff,
int * yoff,
72 Epetra_Vector *&xexact,
bool StaticProfile,
bool MakeLocalOnly);
74 void GenerateCrsProblem(
int numNodesX,
int numNodesY,
int numProcsX,
int numProcsY,
int numPoints,
75 int * xoff,
int * yoff,
int nrhs,
85 int myPID,
int * & myGlobalElements);
90 int main(
int argc,
char *argv[])
93 const string EpetraBenchmarkTest =
"Epetra Benchmark Test Version 0.2 08/30/2007";
98 double global_dimension;
99 double global_nonzero_count;
105 MPI_Init(&argc,&argv);
112 bool printflops =
true, printmflops =
true, printtime =
true;
113 if (argc>5)
if (argv[5][0]==
'-' && argv[5][1]==
'f') {printmflops =
false; printtime =
false;}
114 bool mflopsonly =
false;
115 if (argc>5)
if (argv[5][0]==
'-' && argv[5][1]==
'm') {printflops =
false; printtime =
false;}
116 bool timeonly =
false;
117 if (argc>5)
if (argv[5][0]==
'-' && argv[5][1]==
't') {printflops =
false; printmflops =
false;}
120 bool makeheader =
false;
121 if (argc>6)
if (argv[6][0]==
'-' && argv[6][1]==
'h') makeheader =
true;
124 cerr <<
"Usage: " << argv[0]
125 <<
" NumNodesX NumNodesY NumProcX NumProcY [-a|-f|-m|-t [-h]]" << endl
127 <<
"NumNodesX - Number of mesh nodes in X direction per processor" << endl
128 <<
"NumNodesY - Number of mesh nodes in Y direction per processor" << endl
129 <<
"NumProcX - Number of processors to use in X direction" << endl
130 <<
"NumProcY - Number of processors to use in Y direction" << endl
131 <<
"-a|-f|-m|-t - Type of information to print: a=all, f=FLOPS, m=MFLOP/s, t=time (sec). Default is -a."
132 <<
"-h - (Optional) Printer output table header if -h present (typically done on first call)." << endl
133 <<
" NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
134 <<
" Serial example:" << endl << endl
135 << argv[0] <<
" 200 300 1 1 -m" << endl
136 <<
" Run this program on 1 processor using a 200 by 300 grid, printing only MFLOP/s information without a header."<< endl <<endl
137 <<
" MPI example:" << endl << endl
138 <<
"mpirun -np 32 " << argv[0] <<
" 250 200 4 8 -a -h" << endl
139 <<
" Run this program on 32 processors putting a 250 by 200 subgrid on each processor using 4 processors "<< endl
140 <<
" in the X direction and 8 in the Y direction. Total grid size is 1000 points in X and 1600 in Y for a total of 1.6M equations."<< endl
141 <<
" Print all information. Print header." << endl
151 if (makeheader && comm.
MyPID()==0)
152 cout << EpetraBenchmarkTest << endl
154 if (makeheader) cout << comm <<endl;
159 if (makeheader && comm.
MyPID()!=0) makeheader =
false;
161 int numNodesX = atoi(argv[1]);
162 int numNodesY = atoi(argv[2]);
163 int numProcsX = atoi(argv[3]);
164 int numProcsY = atoi(argv[4]);
168 cout <<
" Number of local nodes in X direction = " << numNodesX << endl
169 <<
" Number of local nodes in Y direction = " << numNodesY << endl
170 <<
" Number of global nodes in X direction = " << numNodesX*numProcsX << endl
171 <<
" Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
172 <<
" Number of local nonzero entries = " << numNodesX*numNodesY*numPoints << endl
173 <<
" Number of global nonzero entries = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
174 <<
" Number of Processors in X direction = " << numProcsX << endl
175 <<
" Number of Processors in Y direction = " << numProcsY << endl
176 <<
" Number of Points in stencil = " << numPoints << endl << endl;
177 cout <<
" Timing the following:" <<endl
178 <<
" SpMV - Sparse matrix vector product using Epetra_CrsMatrix class" << endl
179 <<
" SpMM2- Sparse matrix times 2-column multivector using Epetra_CrsMatrix class" << endl
180 <<
" SpMM4- Sparse matrix times 4-column multivector using Epetra_CrsMatrix class" << endl
181 <<
" SpMM8- Sparse matrix times 8-column multivector using Epetra_CrsMatrix class" << endl
182 <<
" 2-norm of an Epetra_MultiVector" << endl
183 <<
" Dot-product of 2 Epetra_MultiVectors" << endl
184 <<
" AXPY of 2 Epetra_MultiVectors" << endl << endl;
187 if (numProcsX*numProcsY!=comm.
NumProc()) {
188 cerr <<
"Number of processors = " << comm.
NumProc() << endl
189 <<
" is not the product of " << numProcsX <<
" and " << numProcsY << endl << endl;
193 if (numNodesX*numNodesY<=0) {
194 cerr <<
"Product of number of nodes is <= zero" << endl << endl;
204 int xo = -2, yo = -2;
205 Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
206 Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
208 Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
209 Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
211 Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
212 Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
214 Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
215 Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
217 Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
218 Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
228 global_dimension = (double) (numNodesX*numNodesY);
229 global_dimension *= (double) (numProcsX*numProcsY);
230 global_nonzero_count = global_dimension * (double) numPoints;
234 const int maxtimings = 7;
235 const int maxtrials = 10;
236 double results[maxtimings][3];
240 for (
int k=0; k<4; k++) {
241 if (k>0) nrhs = nrhs*2;
245 map, A, b, bt, xexact,
true,
false);
254 A->OptimizeStorage();
259 for(
int i = 0; i < maxtrials; ++i )
260 A->Multiply(
false, *xexact, z);
262 double elapsed_time = timer.ElapsedTime();
263 double total_flops = 2.0*global_nonzero_count *((double) maxtrials);
266 r.
Update(-1.0, z, 1.0, *b, 0.0);
269 double diff = resvec.
NormInf();
270 if (diff>1.0e-8 && comm.MyPID()==0) cerr <<
"Warning: Residual vector unusually large = " << diff << endl;
272 double MFLOPs = total_flops/elapsed_time/1000000.0;
274 results[ntimings][0] = total_flops;
275 results[ntimings][1] = elapsed_time;
276 results[ntimings++][2] = MFLOPs;
287 if (ntimings+3>maxtimings) cerr <<
"Variable maxtimings = " << maxtimings <<
" must be at least = " << ntimings+3 << endl;
298 timer.ResetStartTime();
301 for(
int i = 0; i < maxtrials; ++i )
304 elapsed_time = timer.ElapsedTime();
305 total_flops = 2.0*global_dimension *((double) maxtrials);
306 MFLOPs = total_flops/elapsed_time/1000000.0;
307 results[ntimings][0] = total_flops;
308 results[ntimings][1] = elapsed_time;
309 results[ntimings++][2] = MFLOPs;
313 timer.ResetStartTime();
316 for(
int i = 0; i < maxtrials; ++i )
319 elapsed_time = timer.ElapsedTime();
320 total_flops = 2.0*global_dimension *((double) maxtrials);
321 MFLOPs = total_flops/elapsed_time/1000000.0;
322 results[ntimings][0] = total_flops;
323 results[ntimings][1] = elapsed_time;
324 results[ntimings++][2] = MFLOPs;
326 timer.ResetStartTime();
329 for(
int i = 0; i < maxtrials; ++i )
330 q.
Update(1.0, z, 1.0, r, 0.0);
332 elapsed_time = timer.ElapsedTime();
333 total_flops = 2.0*global_dimension *((double) maxtrials);
334 MFLOPs = total_flops/elapsed_time/1000000.0;
335 results[ntimings][0] = total_flops;
336 results[ntimings][1] = elapsed_time;
337 results[ntimings++][2] = MFLOPs;
340 cout <<
"Metric_\t\t_Procs_\t__SpMV__\t_SpMM2__\t_SpMM4__\t_SpMM8__\t__NORM___\t__DOT___\t__AXPY__" << endl;
341 if (comm.
MyPID()==0) {
342 cout.setf(std::ios::scientific);
344 for (
int j=0; j<3; j++) {
346 if (j==0 && printflops) { cout <<
"FLOPS\t"; doloop =
true;}
347 else if (j==1 && printtime) {cout <<
"Time(s)\t"; doloop =
true;}
348 else if (j==2 && printmflops) {cout <<
"MFLOP/s\t"; doloop =
true;}
350 cout <<
"\t" << comm.
NumProc();
351 for (
int i=0; i<maxtimings; i++)
352 cout <<
"\t" << results[i][j];
399 int * xoff,
int * yoff,
405 Epetra_Vector *&xexact,
bool StaticProfile,
bool MakeLocalOnly) {
411 map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
421 int * xoff,
int * yoff,
int nrhs,
431 int * myGlobalElements;
434 int numMyEquations = numNodesX*numNodesY;
436 map =
new Epetra_Map(-1, numMyEquations, myGlobalElements, 0, comm);
437 delete [] myGlobalElements;
441 int profile = 0;
if (StaticProfile) profile = numPoints;
443 #ifdef EPETRA_HAVE_STATICPROFILE
459 int * indices =
new int[numPoints];
460 double * values =
new double[numPoints];
462 double dnumPoints = (double) numPoints;
463 int nx = numNodesX*numProcsX;
465 for (
int i=0; i<numMyEquations; i++) {
467 int rowID = map->
GID(i);
470 for (
int j=0; j<numPoints; j++) {
471 int colID = rowID + xoff[j] + nx*yoff[j];
472 if (colID>-1 && colID<numGlobalEquations) {
473 indices[numIndices] = colID;
474 double value = - ((double) rand())/ ((
double) RAND_MAX);
476 values[numIndices++] = dnumPoints - value;
478 values[numIndices++] = value;
515 int myPID,
int * & myGlobalElements) {
517 myGlobalElements =
new int[numNodesX*numNodesY];
518 int myProcX = myPID%numProcsX;
519 int myProcY = myPID/numProcsX;
520 int curGID = myProcY*(numProcsX*numNodesX)*numNodesY+myProcX*numNodesX;
521 for (
int j=0; j<numNodesY; j++) {
522 for (
int i=0; i<numNodesX; i++) {
523 myGlobalElements[j*numNodesX+i] = curGID+i;
525 curGID+=numNodesX*numProcsX;
Epetra_MultiVector: A class for constructing and using dense multi-vectors, vectors and matrices in p...
int NumGlobalElements() const
Number of elements across all processors.
Epetra_Map: A class for partitioning vectors and matrices.
int Random()
Set multi-vector values to random numbers.
Epetra_IntSerialDenseVector: A class for constructing and using dense vectors.
int Multiply(bool TransA, const Epetra_Vector &x, Epetra_Vector &y) const
Returns the result of a Epetra_CrsMatrix multiplied by a Epetra_Vector x in y.
double ElapsedTime(void) const
Epetra_Time elapsed time function.
int Size(int Length_in)
Set length of a Epetra_IntSerialDenseVector object; init values to zero.
virtual int InsertGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
Insert a list of elements in a given global row of the matrix.
double NormInf() const
Compute Inf-norm of each vector in multi-vector.
Epetra_Vector: A class for constructing and using dense vectors on a parallel computer.
int MyPID() const
Return my process ID.
Epetra_MpiComm: The Epetra MPI Communication Class.
std::string Epetra_Version()
virtual int MyPID() const =0
Return my process ID.
int FillComplete(bool OptimizeDataStorage=true)
Signal that data entry is complete. Perform transformations to local index space. ...
int Resize(int Length_in)
Resize a Epetra_SerialDenseVector object.
Epetra_Time: The Epetra Timing Class.
int Dot(const Epetra_MultiVector &A, double *Result) const
Computes dot product of each corresponding pair of vectors.
Epetra_SerialDenseVector: A class for constructing and using dense vectors.
Epetra_Comm: The Epetra Communication Abstract Base Class.
void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int *xoff, int *yoff, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *&map, Epetra_CrsMatrix *&A, Epetra_Vector *&b, Epetra_Vector *&bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly)
int GID(int LID) const
Returns global ID of local ID, return IndexBase-1 if not found on this processor. ...
int NumProc() const
Returns total number of processes (always returns 1 for SerialComm).
Epetra_SerialComm: The Epetra Serial Communication Class.
int Update(double ScalarA, const Epetra_MultiVector &A, double ScalarThis)
Update multi-vector values with scaled values of A, this = ScalarThis*this + ScalarA*A.
Epetra_CrsMatrix: A class for constructing and using real-valued double-precision sparse compressed r...
int Norm2(double *Result) const
Compute 2-norm of each vector in multi-vector.
int main(int argc, char *argv[])
void runMatrixTests(Epetra_CrsMatrix *A, Epetra_MultiVector *b, Epetra_MultiVector *bt, Epetra_MultiVector *xexact, bool StaticProfile, bool verbose, bool summary)
void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs, int myPID, int *&myGlobalElements)
double * Values() const
Returns pointer to the values in vector.
int * Values()
Returns pointer to the values in vector.
void ResetStartTime(void)
Epetra_Time function to reset the start time for a timer object.