40 #include "paz_aztec.h"
44 #include "Petra_Comm.h"
45 #include "Petra_Map.h"
46 #include "Petra_Time.h"
47 #include "Petra_BlockMap.h"
48 #include "Petra_RDP_MultiVector.h"
49 #include "Petra_RDP_Vector.h"
50 #include "Petra_RDP_DVBR_Matrix.h"
51 #include "Petra_RDP_CRS_Matrix.h"
52 #include "Ifpack_ILUK_Graph.h"
53 #include "Ifpack_RDP_CRS_RILUK.h"
55 #define perror(str) { fprintf(stderr,"%s\n",str); exit(-1); }
56 #define perror1(str,ierr) { fprintf(stderr,"%s %d\n",str,ierr); exit(-1); }
57 #define double_quote '"'
61 Ifpack_RDP_CRS_RILUK *M,
64 double *residual,
double & FLOPS,
bool verbose);
66 int main(
int argc,
char *argv[])
79 double *xguess, *b, *bt, *xexact, *xsolve;
80 int n_nonzeros, n_blk_nonzeros, ierr;
82 int numLocalEquations;
84 int numGlobalEquations, numGlobalBlocks;
86 int *blockSizes, *numNzBlks, *blkColInds;
88 int N_external, N_blk_eqns;
89 int blk_row, *blk_col_inds;
90 int row, *col_inds, numEntries;
100 int has_global_indices, option;
103 int *proc_config =
new int[PAZ_PROC_SIZE];
107 MPI_Init(&argc,&argv);
113 Petra_Comm& Comm = *
new Petra_Comm(MPI_COMM_WORLD);
115 Petra_Comm& Comm = *
new Petra_Comm();
118 printf(
"proc %d of %d is alive\n",
119 Comm.MyPID(),Comm.NumProc());
121 int MyPID = Comm.MyPID();
124 if (MyPID==0) verbose =
true;
130 PAZ_set_proc_config(proc_config,MPI_COMM_WORLD);
132 PAZ_set_proc_config(proc_config,0);
139 perror(
"error: enter name of data and partition file on command line, followed by levelfill and shift value") ;
141 if(argc != 5)
perror(
"error: enter name of data file on command line, followed by levelfill and shift value") ;
151 read_hb(argv[1], &numGlobalEquations, &n_nonzeros,
152 &val_msr, &bindx_msr, &xguess, &b, &bt, &xexact);
154 create_vbr(argv[2], proc_config, &numGlobalEquations, &numGlobalBlocks,
155 &n_nonzeros, &n_blk_nonzeros, &N_update, &update,
156 bindx_msr, val_msr, &val, &indx,
157 &rpntr, &cpntr, &bpntr, &bindx);
159 if(proc_config[PAZ_node] == 0)
161 free ((
void *) val_msr);
162 free ((
void *) bindx_msr);
163 free ((
void *) cpntr);
165 matrix_type = PAZ_VBR_MATRIX;
170 &n_nonzeros, &n_blk_nonzeros,
172 &val, &indx, &rpntr, &cpntr, &bpntr, &bindx,
173 &xguess, &b, &bt, &xexact);
177 read_hb(argv[1], &numGlobalEquations, &n_nonzeros,
178 &val, &bindx, &xguess, &b, &bt, &xexact);
183 &update, &val, &bindx, &xguess, &b, &bt, &xexact);
186 for (i = 0; i<N_update; i++)
187 if (val[i] == 0.0 ) printf(
"Zero diagonal at row %d\n",i);
189 matrix_type = PAZ_MSR_MATRIX;
193 numLocalEquations = rpntr[N_update];
195 numLocalEquations = N_update;
204 numLocalBlocks = N_update;
206 blockSizes =
new int[numLocalBlocks];
207 for (i=0; i<numLocalBlocks; i++)
208 blockSizes[i] = rpntr[i+1]-rpntr[i];
212 numNzBlks =
new int[numLocalBlocks];
213 for (i=0; i<numLocalBlocks; i++)
214 numNzBlks[i] = bpntr[i+1] - bpntr[i];
220 Petra_BlockMap& map = *
new Petra_BlockMap(numGlobalEquations, numLocalEquations,
221 update, indexBase, Comm, numGlobalBlocks, numLocalBlocks,
224 Petra_RDP_DVBR_Matrix&
A = *
new Petra_RDP_DVBR_Matrix(map);
226 if ((ierr=A.allocate(numNzBlks, blkColInds)))
227 perror1(
"Error in DVBR_Matrix_allocate:",ierr);
231 for (blk_row=0; blk_row<numLocalBlocks; blk_row++)
233 row_vals = val + indx[bpntr[blk_row]];
234 blk_col_inds = bindx + bpntr[blk_row];
235 if ((ierr=A.putBlockRow(update[blk_row], numNzBlks[blk_row], row_vals,
238 printf(
"Row %d:",update[row]);
239 perror1(
"Error putting block row:",ierr);
243 if ((ierr=A.FillComplete()))
244 perror1(
"Error in DVBR_Matrix_FillComplete:",ierr);
249 numNz =
new int[numLocalEquations];
250 for (i=0; i<numLocalEquations; i++) numNz[i] = bindx[i+1] - bindx[i];
253 ColInds = bindx+numLocalEquations+1;
255 Petra_Map& map = *
new Petra_Map(numGlobalEquations, numLocalEquations,
259 Petra_Time & FillTimer = *
new Petra_Time(Comm);
260 Petra_RDP_CRS_Matrix& A = *
new Petra_RDP_CRS_Matrix(
Copy, map, numNz);
264 for (row=0; row<numLocalEquations; row++)
266 row_vals = val + bindx[row];
267 col_inds = bindx + bindx[row];
268 numEntries = bindx[row+1] - bindx[row];
269 if ((ierr = A.InsertGlobalValues(update[row], numEntries, row_vals, col_inds)))
271 printf(
"Row %d:",update[row]);
272 perror1(
"Error putting row:",ierr);
274 if ((ierr=(A.InsertGlobalValues(update[row], 1, val+row, update+row)<0)))
275 perror1(
"Error putting diagonal",ierr);
278 double FillTime = FillTimer.ElapsedTime();
279 if ((ierr=A.FillComplete()))
280 perror1(
"Error in Petra_RDP_Vector_fillComplete",ierr);
281 double FillCompleteTime = FillTimer.ElapsedTime() - FillTime;
282 if (Comm.MyPID()==0) {
283 cout <<
"\n\n****************************************************" << endl;
284 cout <<
"\n\nMatrix construction time (sec) = " << FillTime<< endl;
285 cout <<
" Matrix FillComplete time (sec) = " << FillCompleteTime << endl;
286 cout <<
" Total construction time (sec) = " << FillTime+FillCompleteTime << endl<< endl;
298 double **allx =
new double*[nrhs];
299 double *xexact2 =
new double[numLocalEquations];
300 for (i = 0;i<numLocalEquations; i++) xexact2[i] = 2.0 * xexact[i];
301 allx[0] = xexact; allx[1] = xexact2;
303 double **allb =
new double*[nrhs];
304 double *b2 =
new double[numLocalEquations];
305 for (i = 0;i<numLocalEquations; i++) b2[i] = 2.0 * b[i];
306 allb[0] = b; allb[1] = b2;
308 double **allbt =
new double*[nrhs];
309 double *bt2 =
new double[numLocalEquations];
310 for (i = 0;i<numLocalEquations; i++) bt2[i] = 2.0 * bt[i];
311 allbt[0] = bt; allbt[1] = bt2;
313 Petra_RDP_MultiVector& xtrue = *
new Petra_RDP_MultiVector(
Copy, map, allx, nrhs);
314 Petra_RDP_MultiVector& bexact = *
new Petra_RDP_MultiVector(
Copy, map, allb, nrhs);
315 Petra_RDP_MultiVector& btexact = *
new Petra_RDP_MultiVector(
Copy, map, allbt, nrhs);
316 Petra_RDP_MultiVector& bcomp = *
new Petra_RDP_MultiVector(map, nrhs);
317 Petra_RDP_MultiVector& xcomp = *
new Petra_RDP_MultiVector(map, nrhs);
318 Petra_RDP_MultiVector& resid = *
new Petra_RDP_MultiVector(map, nrhs);
322 Petra_RDP_Vector& xtrue = *
new Petra_RDP_Vector(
Copy, map, xexact);
323 Petra_RDP_Vector& bexact = *
new Petra_RDP_Vector(
Copy, map, b);
324 Petra_RDP_Vector& btexact = *
new Petra_RDP_Vector(
Copy, map, bt);
325 Petra_RDP_Vector& bcomp = *
new Petra_RDP_Vector(map);
326 Petra_RDP_Vector& xcomp = *
new Petra_RDP_Vector(map);
327 Petra_RDP_Vector& resid = *
new Petra_RDP_Vector(map);
335 double elapsed_time, total_flops, MFLOPs;
336 Petra_Time & timer = *
new Petra_Time(Comm);
338 int LevelFill = atoi(argv[argc-3]);
339 if (verbose) cout <<
"Using Level Fill = " << LevelFill << endl;
340 double ShiftValue = atof(argv[argc-2]);
341 if (verbose) cout <<
"Using Diagonal Shift Value of = " << ShiftValue << endl;
342 int NumThreads = atoi(argv[argc-1]);
343 if (verbose) cout <<
"Using " << NumThreads <<
" Threads " << endl;
345 Ifpack_RDP_CRS_RILUK * ILUK = 0;
346 Ifpack_ILUK_Graph * ILUK_Graph = 0;
348 elapsed_time = timer.ElapsedTime();
349 ILUK_Graph =
new Ifpack_ILUK_Graph(A.Graph(), LevelFill);
350 assert(ILUK_Graph->ConstructFilledGraph()==0);
351 assert(ILUK_Graph->ComputeLevels(NumThreads)==0);
352 elapsed_time = timer.ElapsedTime() - elapsed_time;
353 if (verbose) cout <<
"Time to construct ILUK graph = " << elapsed_time << endl;
357 elapsed_time = timer.ElapsedTime();
358 ILUK =
new Ifpack_RDP_CRS_RILUK(A, *ILUK_Graph);
359 ILUK->SetShiftValue(ShiftValue);
360 assert(ILUK->InitValues()==0);
361 assert(ILUK->Factor()==0);
362 elapsed_time = timer.ElapsedTime() - elapsed_time;
363 total_flops = ILUK->Flops();
364 MFLOPs = total_flops/elapsed_time/1000000.0;
365 if (verbose) cout <<
"Time to compute preconditioner values = "
366 << elapsed_time << endl
367 <<
"MFLOPS for Factorization = " << MFLOPs << endl;
371 double Tolerance = 1.0E-14;
372 double * residual =
new double[nrhs];
375 elapsed_time = timer.ElapsedTime();
377 BiCGSTAB(A, xcomp, bexact, ILUK, Maxiter, Tolerance, residual, total_flops, verbose);
379 elapsed_time = timer.ElapsedTime() - elapsed_time;
380 MFLOPs = total_flops/elapsed_time/1000000.0;
381 if (verbose) cout <<
"Time to compute solution = "
382 << elapsed_time << endl
383 <<
"Number of operations in solve = " << total_flops << endl
384 <<
"MFLOPS for Solve = " << MFLOPs<< endl << endl;
390 for (ii=0; ii<2; ii++) {
391 bool TransA = (ii==1);
393 elapsed_time = timer.ElapsedTime();
394 for (iii=0; iii<NumMVs; iii++)
395 if ((ierr=A.Multiply(TransA, xcomp, bcomp)))
perror1(
"Error in matvec",ierr);
396 elapsed_time = timer.ElapsedTime() - elapsed_time;
397 total_flops = A.Flops();
398 MFLOPs = total_flops/elapsed_time/1000000.0;
399 if (Comm.MyPID()==0) {
401 cout <<
"\n\n****************************************************" << endl;
402 cout <<
"\n\nResults for transpose multiply with standard storage" << endl;
405 cout <<
"\n\nMatrix Fill cost = " << (FillTime+FillCompleteTime)/elapsed_time*NumMVs
406 <<
" Matrix-Multiplies " << endl;
407 cout <<
"\n\n*******************************************************" << endl;
408 cout <<
"\n\nResults for no transpose multiply with standard storage" << endl;
411 cout <<
"\n\nMatrix Fill cost = " << (FillTime+FillCompleteTime)/elapsed_time*NumMVs
412 <<
" Matrix-Multiplies " << endl;
413 cout <<
"\n\nTotal FLOPS for standard Storage (" <<NumMVs<<
" Multiplies) = "
414 << total_flops<< endl;
415 cout <<
" Total time for standard Storage = " << elapsed_time<< endl;
416 cout <<
" Total MFLOPs for standard matrix multiply = " << MFLOPs << endl<< endl;
422 if ((ierr=resid.Update(-1.0, btexact, 1.0, bcomp, 0.0)))
perror1(
"Error in Update",ierr);}
424 if ((ierr=resid.Update(-1.0, bexact, 1.0, bcomp, 0.0)))
perror1(
"Error in Update",ierr);}
426 if ((ierr = resid.Norm2(residual)))
perror1(
"Error in Norm2",ierr);
429 for (i=0; i< nrhs; i++) printf(
"Residual[%d] = %22.16g\n",i,residual[i]);
435 if ((ierr=A.OptimizeStorage()))
perror1(
"Error in Petra_RDP_CRS_Matrix: OptimizeStorage",ierr);
437 for (ii=0; ii<2; ii++) {
438 bool TransA = (ii==1);
440 elapsed_time = timer.ElapsedTime();
441 for (iii=0; iii<NumMVs; iii++)
442 if ((ierr=A.Multiply(TransA, xcomp, bcomp)))
443 perror1(
"Error in Multiply",ierr);
444 elapsed_time = timer.ElapsedTime() - elapsed_time;
445 total_flops = A.Flops();
446 MFLOPs = total_flops/elapsed_time/1000000.0;
447 if (Comm.MyPID()==0) {
448 cout <<
"\n\n****************************************************" << endl;
449 if (TransA) cout <<
"\n\nResults for transpose multiply with optimized storage" << endl;
450 else cout <<
"\n\nResults for no transpose multiply with optimized storage"<< endl;
451 cout <<
"\n\nTotal FLOPS for optimized storage (" <<NumMVs<<
" Multiplies) = "
452 << total_flops<< endl;
453 cout <<
" Total time for optimized Storage = " << elapsed_time<< endl;
454 cout <<
" Total MFLOPs for optimized matrix multiply = " << MFLOPs << endl<< endl;
458 if ((ierr=resid.Update(-1.0, btexact, 1.0, bcomp, 0.0)))
perror1(
"Error in Update",ierr);}
460 if ((ierr=resid.Update(-1.0, bexact, 1.0, bcomp, 0.0)))
perror1(
"Error in Update",ierr); }
462 if ((ierr = resid.Norm2(residual)))
perror1(
"Error in Norm2",ierr);
465 for (i=0; i< nrhs; i++) printf(
"Residual[%d] = %22.16g\n",i,residual[i]);
469 free ((
void *) xguess);
472 free ((
void *) xexact);
474 free ((
void *) bindx);
475 free ((
void *) update);
478 free ((
void *) indx);
479 free ((
void *) rpntr);
480 free ((
void *) bpntr);
481 delete [] blockSizes;
517 Ifpack_RDP_CRS_RILUK *M,
520 double *residual,
double & FLOPS,
bool verbose) {
523 Petra_RDP_Vector& phat = *
new Petra_RDP_Vector(x); phat.PutScalar(0.0);
524 Petra_RDP_Vector& p = *
new Petra_RDP_Vector(x); p.PutScalar(0.0);
525 Petra_RDP_Vector& shat = *
new Petra_RDP_Vector(x); shat.PutScalar(0.0);
526 Petra_RDP_Vector& s = *
new Petra_RDP_Vector(x); s.PutScalar(0.0);
527 Petra_RDP_Vector& r = *
new Petra_RDP_Vector(x); r.PutScalar(0.0);
528 Petra_RDP_Vector& rtilde = *
new Petra_RDP_Vector(x); rtilde.Random();
529 Petra_RDP_Vector& v = *
new Petra_RDP_Vector(x); v.PutScalar(0.0);
532 A.Multiply(
false, x, r);
534 r.Update(1.0, b, -1.0);
536 double r_norm, b_norm, scaled_r_norm, rhon, rhonm1 = 1.0;
537 double alpha = 1.0, omega = 1.0, sigma;
538 double omega_num, omega_den;
541 scaled_r_norm = r_norm/b_norm;
544 if (verbose) cout <<
"Initial residual = " << r_norm
545 <<
" Scaled residual = " << scaled_r_norm << endl;
548 for (
int i=0; i<Maxiter; i++) {
550 double beta = (rhon/rhonm1) * (alpha/omega);
557 double dtemp = - beta*omega;
559 p.Update(1.0, r, dtemp, v, beta);
563 M->LevelSolve(
false, p, phat);
564 A.Multiply(
false, phat, v);
567 rtilde.Dot(v,&sigma);
574 s.Update(-alpha, v, 1.0, r, 0.0);
578 M->LevelSolve(
false, s, shat);
579 A.Multiply(
false, shat, r);
581 r.Dot(s, &omega_num);
582 r.Dot(r, &omega_den);
583 omega = omega_num / omega_den;
588 x.Update(alpha, phat, omega, shat, 1.0);
589 r.Update(1.0, s, -omega);
592 scaled_r_norm = r_norm/b_norm;
595 if (verbose) cout <<
"Iter "<< i <<
" residual = " << r_norm
596 <<
" Scaled residual = " << scaled_r_norm << endl;
598 if (scaled_r_norm < Tolerance)
break;
601 FLOPS = phat.Flops()+p.Flops()+shat.Flops()+s.Flops()+r.Flops()+rtilde.Flops()+
602 v.Flops()+A.Flops()+x.Flops()+b.Flops();
603 if (M!=0) FLOPS += M->Flops();
#define perror1(str, ierr)
void read_hb(char *data_file, int *N_global, int *n_nonzeros, double **val, int **bindx, double **x, double **b, double **bt, double **xexact)
int main(int argc, char *argv[])
void distrib_msr_matrix(int *proc_config, int *N_global, int *n_nonzeros, int *N_update, int **update, double **val, int **bindx, double **x, double **b, double **bt, double **xexact)
void BiCGSTAB(Epetra_CrsMatrix &A, Epetra_Vector &x, Epetra_Vector &b, Ifpack_CrsRiluk *M, int Maxiter, double Tolerance, double *residual, bool verbose)
void create_vbr(char *partition_file, int *proc_config, int *N_global, int *N_blk_global, int *n_nonzeros, int *n_blk_nonzeros, int *N_update, int **update, int *bindx_msr, double *val_msr, double **val, int **indx, int **rpntr, int **cpntr, int **bpntr, int **bindx)
void distrib_vbr_matrix(int *proc_config, int *N_global, int *N_blk_global, int *n_nonzeros, int *n_blk_nonzeros, int *N_update, int **update, double **val, int **indx, int **rpntr, int **cpntr, int **bpntr, int **bindx, double **x, double **b, double **bt, double **xexact)