59 #include "../epetra_test_err.h"
62 int main(
int argc,
char *argv[])
64 int ierr = 0, i, j, forierr = 0;
71 MPI_Init(&argc,&argv);
83 if (argc>1)
if (argv[1][0]==
'-' && argv[1][1]==
'v') verbose =
true;
90 int MyPID = Comm.
MyPID();
93 if (verbose && Comm.
MyPID()==0)
97 cout <<
"Processor " << MyPID <<
" of " << NumProc <<
" is alive." << endl;
99 bool verbose1 = verbose;
102 if (verbose && Comm.
MyPID()!=0) verbose =
false;
104 int NumMyEquations = 10000;
106 long long NumGlobalEquations = (
long long)(NumMyEquations) * NumProc;
107 long long NumGlobalVariables = 2 * NumGlobalEquations+1;
111 Epetra_Map RowMap(NumGlobalEquations, 0LL, Comm);
112 Epetra_Map XMap(NumGlobalVariables, 0LL, Comm);
116 long long * MyGlobalElements =
new long long[RowMap.
NumMyElements()];
120 long long * XGlobalElements =
new long long[XMap.
NumMyElements()];
124 long long * YGlobalElements =
new long long[YMap.
NumMyElements()];
146 long long ATAssemblyNumMyElements = 2*MyGlobalElements[NumMyEquations-1] + 2 - 2*MyGlobalElements[0] + 1;
147 long long * ATAssemblyGlobalElements =
new long long[ATAssemblyNumMyElements];
149 for (i=0; i<ATAssemblyNumMyElements; i++) ATAssemblyGlobalElements[i] = 2*MyGlobalElements[0] + i;
150 Epetra_Map ATAssemblyMap((
long long)-1, ATAssemblyNumMyElements, ATAssemblyGlobalElements, 0LL, Comm);
169 double *Values =
new double[3];
170 long long *Indices =
new long long[3];
181 for (i=0; i<NumMyEquations; i++)
188 Indices[0] = 2*MyGlobalElements[i]+1;
189 Indices[1] = 2*MyGlobalElements[i]+2;
190 Indices[2] = 2*MyGlobalElements[i];
192 forierr += !(A.
InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices)==0);
194 forierr += !(ATAssembly.
InsertGlobalValues(Indices[j],1, &(Values[j]), &(MyGlobalElements[i]))>=0);
209 if (verbose1 && NumGlobalEquations<20) {
210 if (verbose) cout <<
"\n\n Matrix A\n" << endl;
212 if (verbose) cout <<
" \n\n Matrix A Transpose\n" << endl;
224 Values[0] = 1.0/16.0;
226 Values[2] = 1.0/16.0;
229 for (i=0; i<NumMyEquations; i++)
231 if (MyGlobalElements[i] == 0) {
232 Indices[0] = MyGlobalElements[i];
233 Indices[1] = MyGlobalElements[i]+1;
238 Indices[0] = MyGlobalElements[i]-1;
239 Indices[1] = MyGlobalElements[i];
240 Indices[2] = MyGlobalElements[i]+1;
244 if (MyGlobalElements[i] == NumGlobalEquations-1) NumEntries--;
245 forierr += !(B.
InsertGlobalValues(MyGlobalElements[i], NumEntries, Values+Valstart, Indices)==0);
251 if (verbose && NumGlobalEquations<20) cout <<
"\n\nMatrix B \n" << endl;
252 if (verbose1 && NumGlobalEquations<20) cout << B << endl;
259 for (i=0; i<ntrials; i++) {
263 double total_flops = B.
Flops();
266 double MFLOPs = total_flops/elapsed_time/1000000.0;
268 if (verbose) cout <<
"\n\nTotal MFLOPs for B*Y = " << MFLOPs << endl<< endl;
269 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector Z = B*Y \n";
270 if (verbose1 && NumGlobalEquations<20) cout << BY << endl;
276 for (i=0; i<ntrials; i++) {
280 total_flops = A.
Flops();
282 MFLOPs = total_flops/elapsed_time/1000000.0;
284 if (verbose) cout <<
"\n\nTotal MFLOPs for A^T*Y using A and trans=true = " << MFLOPs << endl<< endl;
285 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector Z = AT*Y \n";
286 if (verbose1 && NumGlobalEquations<20) cout << X << endl;
293 total_flops = A.
Flops();
294 MFLOPs = total_flops/elapsed_time/1000000.0;
297 resid.
Update(1.0, BY, -1.0, AATY, 0.0);
299 resid.
Norm2(&residual);
301 if (verbose) cout <<
"\n\nTotal MFLOPs for A*X using A and trans=false = " << MFLOPs << endl<< endl;
302 if (verbose) cout <<
"Residual = " << residual << endl<< endl;
303 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector Z = A*ATY \n";
304 if (verbose1 && NumGlobalEquations<20) cout << AATY << endl;
310 for (i=0; i<ntrials; i++) {
314 total_flops = AT.
Flops();
316 MFLOPs = total_flops/elapsed_time/1000000.0;
318 if (verbose) cout <<
"\n\nTotal MFLOPs for A^T*Y using AT and trans=false = " << MFLOPs << endl<< endl;
319 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector Z = AT*Y \n";
320 if (verbose1 && NumGlobalEquations<20) cout << X << endl;
325 for (i=0; i<ntrials; i++) {
329 total_flops = AT.
Flops();
330 MFLOPs = total_flops/elapsed_time/1000000.0;
332 resid.
Update(1.0, BY, -1.0, AATY, 0.0);
333 resid.
Norm2(&residual);
335 if (verbose) cout <<
"\n\nTotal MFLOPs for A*X using AT and trans=true = " << MFLOPs << endl<< endl;
336 if (verbose) cout <<
"Residual = " << residual << endl<< endl;
337 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector Z = A*ATY \n";
338 if (verbose1 && NumGlobalEquations<20) cout <<AATY << endl;
346 for (i=0; i<NumMyEquations; i++)
349 forierr += !(AL.
InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices)==0);
360 for (i=0; i<ntrials; i++) {
364 total_flops = A.
Flops();
366 MFLOPs = total_flops/elapsed_time/1000000.0;
370 if (verbose) cout <<
"\n\nTotal MFLOPs for Y=A*X using global distributed Y = " << MFLOPs << endl<< endl;
371 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector Y = A*X using distributed Y \n";
372 if (verbose1 && NumGlobalEquations<20) cout << Y << endl;
373 if (verbose && NumGlobalEquations<20) cout <<
"\n\nA using dist Y range map\n";
374 if (verbose1 && NumGlobalEquations<20) cout << A << endl;
377 for (i=0; i<ntrials; i++) {
381 total_flops = AL.
Flops();
383 MFLOPs = total_flops/elapsed_time/1000000.0;
385 if (verbose) cout <<
"\n\nTotal MFLOPs for Y=A*X using Local replicated Y = " << MFLOPs << endl<< endl;
386 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector YL = AL*X using local replicated Y \n";
387 if (verbose1 && NumGlobalEquations<20) cout << ALX << endl;
388 if (verbose && NumGlobalEquations<20) cout <<
"\n\nA using local Y range map\n";
389 if (verbose1 && NumGlobalEquations<20) cout << AL << endl;
394 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector YL = imported from distributed Y \n";
395 if (verbose1 && NumGlobalEquations<20) cout << YL << endl;
398 if (verbose) cout <<
"Residual = " << residual << endl<< endl;
406 for (i=0; i<ntrials; i++) {
410 total_flops = A.
Flops();
412 MFLOPs = total_flops/elapsed_time/1000000.0;
416 if (verbose) cout <<
"\n\nTotal MFLOPs for X=A^TY using global distributed Y = " << MFLOPs << endl<< endl;
417 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector X using distributed Y \n";
418 if (verbose1 && NumGlobalEquations<20) cout << Y << endl;
419 if (verbose && NumGlobalEquations<20) cout <<
"\n\nA using dist Y range map\n";
420 if (verbose1 && NumGlobalEquations<20) cout << A << endl;
425 for (i=0; i<ntrials; i++) {
429 total_flops = AL.
Flops();
431 MFLOPs = total_flops/elapsed_time/1000000.0;
433 if (verbose) cout <<
"\n\nTotal MFLOPs for X1=A^T*Y using Local replicated Y = " << MFLOPs << endl<< endl;
434 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector X1 using local replicated Y \n";
435 if (verbose1 && NumGlobalEquations<20) cout << X1 << endl;
439 if (verbose) cout <<
"Residual = " << residual << endl<< endl;
447 for (i=0; i<NumMyEquations; i++)
450 forierr += !(AL.
InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices)==0);
461 for (i=0; i<ntrials; i++) {
465 total_flops = A.
Flops();
467 MFLOPs = total_flops/elapsed_time/1000000.0;
471 if (verbose) cout <<
"\n\nTotal MFLOPs for Y=A*X using global distributed X = " << MFLOPs << endl<< endl;
472 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector Y = A*X using distributed X \n";
473 if (verbose1 && NumGlobalEquations<20) cout << Y << endl;
480 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector XL = imported from distributed X \n";
481 if (verbose1 && NumGlobalEquations<20) cout << XL << endl;
484 for (i=0; i<ntrials; i++) {
488 total_flops = AL.
Flops();
490 MFLOPs = total_flops/elapsed_time/1000000.0;
492 if (verbose) cout <<
"\n\nTotal MFLOPs for Y1=A*XL using Local replicated X = " << MFLOPs << endl<< endl;
493 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector Y1 = AL*XL using local replicated X \n";
494 if (verbose1 && NumGlobalEquations<20) cout << Y1 << endl;
500 if (verbose) cout <<
"Residual = " << residual << endl<< endl;
507 for (i=0; i<ntrials; i++) {
511 total_flops = A.
Flops();
513 MFLOPs = total_flops/elapsed_time/1000000.0;
516 if (verbose) cout <<
"\n\nTotal MFLOPs for X=A^TY using global distributed X = " << MFLOPs << endl<< endl;
517 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector X using distributed X \n";
518 if (verbose1 && NumGlobalEquations<20) cout << X << endl;
523 for (i=0; i<ntrials; i++) {
527 total_flops = AL.
Flops();
529 MFLOPs = total_flops/elapsed_time/1000000.0;
531 if (verbose) cout <<
"\n\nTotal MFLOPs for XL=A^T*Y1 using Local replicated X = " << MFLOPs << endl<< endl;
532 if (verbose && NumGlobalEquations<20) cout <<
"\n\nVector XL using local replicated X \n";
533 if (verbose1 && NumGlobalEquations<20) cout << XL << endl;
539 if (verbose) cout <<
"Residual = " << residual << endl<< endl;
544 delete [] MyGlobalElements;
545 delete [] XGlobalElements;
546 delete [] YGlobalElements;
547 delete [] ATAssemblyGlobalElements;
void SetFlopCounter(const Epetra_Flops &FlopCounter_in)
Set the internal Epetra_Flops() pointer.
Epetra_Map: A class for partitioning vectors and matrices.
int Random()
Set multi-vector values to random numbers.
int Multiply(bool TransA, const Epetra_Vector &x, Epetra_Vector &y) const
Returns the result of a Epetra_CrsMatrix multiplied by a Epetra_Vector x in y.
#define EPETRA_TEST_ERR(a, b)
int MyGlobalElements(int *MyGlobalElementList) const
Puts list of global elements on this processor into the user-provided array.
double ElapsedTime(void) const
Epetra_Time elapsed time function.
virtual int InsertGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
Insert a list of elements in a given global row of the matrix.
static void SetTracebackMode(int TracebackModeValue)
Set the value of the Epetra_Object error traceback report mode.
Epetra_Export: This class builds an export object for efficient exporting of off-processor elements...
Epetra_Vector: A class for constructing and using dense vectors on a parallel computer.
int MyPID() const
Return my process ID.
Epetra_MpiComm: The Epetra MPI Communication Class.
std::string Epetra_Version()
Epetra_Import: This class builds an import object for efficient importing of off-processor elements...
int FillComplete(bool OptimizeDataStorage=true)
Signal that data entry is complete. Perform transformations to local index space. ...
Epetra_Time: The Epetra Timing Class.
int NumMyElements() const
Number of elements on the calling processor.
int Export(const Epetra_SrcDistObject &A, const Epetra_Import &Importer, Epetra_CombineMode CombineMode, const Epetra_OffsetIndex *Indexor=0)
Exports an Epetra_DistObject using the Epetra_Import object.
int ExtractGlobalRowCopy(int GlobalRow, int Length, int &NumEntries, double *Values, int *Indices) const
Returns a copy of the specified global row in user-provided arrays.
int NumProc() const
Returns total number of processes (always returns 1 for SerialComm).
Epetra_SerialComm: The Epetra Serial Communication Class.
void ResetFlops()
Resets the number of floating point operations to zero for this multi-vector.
Epetra_Flops: The Epetra Floating Point Operations Class.
int Update(double ScalarA, const Epetra_MultiVector &A, double ScalarThis)
Update multi-vector values with scaled values of A, this = ScalarThis*this + ScalarA*A.
Epetra_CrsMatrix: A class for constructing and using real-valued double-precision sparse compressed r...
int Norm2(double *Result) const
Compute 2-norm of each vector in multi-vector.
double Flops() const
Returns the number of floating point operations with this multi-vector.
int main(int argc, char *argv[])
void ResetStartTime(void)
Epetra_Time function to reset the start time for a timer object.
int Import(const Epetra_SrcDistObject &A, const Epetra_Import &Importer, Epetra_CombineMode CombineMode, const Epetra_OffsetIndex *Indexor=0)
Imports an Epetra_DistObject using the Epetra_Import object.
Epetra_LocalMap: A class for replicating vectors and matrices across multiple processors.