40 #include "paz_aztec.h" 
   44 #include "Petra_Comm.h" 
   45 #include "Petra_Map.h" 
   46 #include "Petra_Time.h" 
   47 #include "Petra_BlockMap.h" 
   48 #include "Petra_RDP_MultiVector.h" 
   49 #include "Petra_RDP_Vector.h" 
   50 #include "Petra_RDP_DVBR_Matrix.h" 
   51 #include "Petra_RDP_CRS_Matrix.h" 
   52 #include "Ifpack_ILUK_Graph.h" 
   53 #include "Ifpack_RDP_CRS_RILUK.h" 
   55 #define perror(str) { fprintf(stderr,"%s\n",str);   exit(-1); } 
   56 #define perror1(str,ierr) { fprintf(stderr,"%s %d\n",str,ierr);   exit(-1); } 
   57 #define double_quote '"' 
   61               Ifpack_RDP_CRS_RILUK *M,
 
   64               double *residual, 
double & FLOPS, 
bool verbose);
 
   66 int main(
int argc, 
char *argv[])
 
   79   double *xguess, *b, *bt, *xexact, *xsolve;
 
   80   int    n_nonzeros, n_blk_nonzeros, ierr;
 
   82   int    numLocalEquations;
 
   84   int    numGlobalEquations, numGlobalBlocks; 
 
   86   int    *blockSizes, *numNzBlks, *blkColInds;
 
   88   int    N_external, N_blk_eqns;
 
   89   int    blk_row, *blk_col_inds;
 
   90   int    row,     *col_inds, numEntries;
 
  100   int has_global_indices, option;
 
  103   int *proc_config = 
new int[PAZ_PROC_SIZE];
 
  107   MPI_Init(&argc,&argv);
 
  113   Petra_Comm& Comm = *
new Petra_Comm(MPI_COMM_WORLD);
 
  115   Petra_Comm& Comm = *
new Petra_Comm();
 
  118   printf(
"proc %d of %d is alive\n",
 
  119       Comm.MyPID(),Comm.NumProc());
 
  121   int MyPID = Comm.MyPID();
 
  124   if (MyPID==0) verbose = 
true;
 
  130   PAZ_set_proc_config(proc_config,MPI_COMM_WORLD);
 
  132   PAZ_set_proc_config(proc_config,0);
 
  139     perror(
"error: enter name of data and partition file on command line, followed by levelfill and shift value") ;
 
  141   if(argc != 5) 
perror(
"error: enter name of data file on command line, followed by levelfill and shift value") ;
 
  151     read_hb(argv[1], &numGlobalEquations, &n_nonzeros,
 
  152             &val_msr,  &bindx_msr, &xguess, &b, &bt, &xexact);
 
  154   create_vbr(argv[2], proc_config, &numGlobalEquations, &numGlobalBlocks,
 
  155              &n_nonzeros, &n_blk_nonzeros, &N_update, &update,
 
  156              bindx_msr, val_msr, &val, &indx,
 
  157              &rpntr, &cpntr, &bpntr, &bindx);
 
  159   if(proc_config[PAZ_node] == 0)
 
  161       free ((
void *) val_msr);
 
  162       free ((
void *) bindx_msr);
 
  163       free ((
void *) cpntr);
 
  165     matrix_type = PAZ_VBR_MATRIX;
 
  170                       &n_nonzeros, &n_blk_nonzeros,
 
  172                       &val, &indx, &rpntr, &cpntr, &bpntr, &bindx,
 
  173                       &xguess, &b, &bt, &xexact);
 
  177     read_hb(argv[1], &numGlobalEquations, &n_nonzeros,
 
  178              &val,  &bindx, &xguess, &b, &bt, &xexact);
 
  183                   &update, &val, &bindx, &xguess, &b, &bt, &xexact);
 
  186   for (i = 0; i<N_update; i++)
 
  187     if (val[i] == 0.0 ) printf(
"Zero diagonal at row %d\n",i);
 
  189     matrix_type = PAZ_MSR_MATRIX;
 
  193   numLocalEquations = rpntr[N_update];
 
  195   numLocalEquations = N_update;
 
  204   numLocalBlocks = N_update;
 
  206   blockSizes = 
new int[numLocalBlocks];
 
  207   for (i=0; i<numLocalBlocks; i++)
 
  208     blockSizes[i] = rpntr[i+1]-rpntr[i];
 
  212   numNzBlks = 
new int[numLocalBlocks];
 
  213   for (i=0; i<numLocalBlocks; i++)
 
  214     numNzBlks[i] = bpntr[i+1] - bpntr[i];
 
  220   Petra_BlockMap& map = *
new Petra_BlockMap(numGlobalEquations, numLocalEquations,
 
  221                         update, indexBase, Comm, numGlobalBlocks, numLocalBlocks,
 
  224   Petra_RDP_DVBR_Matrix& 
A = *
new Petra_RDP_DVBR_Matrix(map);
 
  226   if ((ierr=A.allocate(numNzBlks, blkColInds)))
 
  227      perror1(
"Error in DVBR_Matrix_allocate:",ierr);
 
  231   for (blk_row=0; blk_row<numLocalBlocks; blk_row++)
 
  233       row_vals = val + indx[bpntr[blk_row]];
 
  234       blk_col_inds = bindx + bpntr[blk_row];
 
  235       if ((ierr=A.putBlockRow(update[blk_row], numNzBlks[blk_row], row_vals,
 
  238          printf(
"Row %d:",update[row]);
 
  239          perror1(
"Error putting block row:",ierr);
 
  243   if ((ierr=A.FillComplete()))
 
  244       perror1(
"Error in DVBR_Matrix_FillComplete:",ierr);
 
  249   numNz = 
new int[numLocalEquations];
 
  250   for (i=0; i<numLocalEquations; i++) numNz[i] = bindx[i+1] - bindx[i];
 
  253   ColInds = bindx+numLocalEquations+1;
 
  255   Petra_Map& map = *
new Petra_Map(numGlobalEquations, numLocalEquations,
 
  259   Petra_Time & FillTimer = *
new Petra_Time(Comm);
 
  260   Petra_RDP_CRS_Matrix& A = *
new Petra_RDP_CRS_Matrix(
Copy, map, numNz);
 
  264   for (row=0; row<numLocalEquations; row++)
 
  266       row_vals = val + bindx[row];
 
  267       col_inds = bindx + bindx[row];
 
  268       numEntries = bindx[row+1] - bindx[row];
 
  269      if ((ierr = A.InsertGlobalValues(update[row], numEntries, row_vals, col_inds)))
 
  271          printf(
"Row %d:",update[row]);
 
  272          perror1(
"Error putting row:",ierr);
 
  274      if ((ierr=(A.InsertGlobalValues(update[row], 1, val+row, update+row)<0)))
 
  275        perror1(
"Error putting  diagonal",ierr);
 
  278    double FillTime = FillTimer.ElapsedTime();
 
  279     if ((ierr=A.FillComplete()))
 
  280     perror1(
"Error in Petra_RDP_Vector_fillComplete",ierr);
 
  281    double FillCompleteTime = FillTimer.ElapsedTime() - FillTime;
 
  282    if (Comm.MyPID()==0) {
 
  283      cout << 
"\n\n****************************************************" << endl;
 
  284      cout << 
"\n\nMatrix construction time (sec) = " << FillTime<< endl;
 
  285      cout << 
"    Matrix FillComplete time (sec) = " << FillCompleteTime << endl;
 
  286      cout << 
"    Total construction time (sec) = " << FillTime+FillCompleteTime << endl<< endl;
 
  298   double **allx = 
new double*[nrhs];
 
  299   double *xexact2 = 
new double[numLocalEquations];
 
  300   for (i = 0;i<numLocalEquations; i++) xexact2[i] = 2.0 * xexact[i];
 
  301   allx[0] = xexact; allx[1] = xexact2;
 
  303   double **allb = 
new double*[nrhs];
 
  304   double *b2 = 
new double[numLocalEquations];
 
  305   for (i = 0;i<numLocalEquations; i++) b2[i] = 2.0 * b[i];
 
  306   allb[0] = b; allb[1] = b2;
 
  308   double **allbt = 
new double*[nrhs];
 
  309   double *bt2 = 
new double[numLocalEquations];
 
  310   for (i = 0;i<numLocalEquations; i++) bt2[i] = 2.0 * bt[i];
 
  311   allbt[0] = bt; allbt[1] = bt2;
 
  313   Petra_RDP_MultiVector& xtrue = *
new Petra_RDP_MultiVector(
Copy, map, allx, nrhs);
 
  314   Petra_RDP_MultiVector& bexact = *
new Petra_RDP_MultiVector(
Copy, map, allb, nrhs);
 
  315   Petra_RDP_MultiVector& btexact = *
new Petra_RDP_MultiVector(
Copy, map, allbt, nrhs);
 
  316   Petra_RDP_MultiVector& bcomp = *
new Petra_RDP_MultiVector(map, nrhs);
 
  317   Petra_RDP_MultiVector& xcomp = *
new Petra_RDP_MultiVector(map, nrhs);
 
  318   Petra_RDP_MultiVector& resid = *
new Petra_RDP_MultiVector(map, nrhs);
 
  322   Petra_RDP_Vector& xtrue = *
new Petra_RDP_Vector(
Copy, map, xexact);
 
  323   Petra_RDP_Vector& bexact = *
new Petra_RDP_Vector(
Copy, map, b);
 
  324   Petra_RDP_Vector& btexact = *
new Petra_RDP_Vector(
Copy, map, bt);
 
  325   Petra_RDP_Vector& bcomp = *
new Petra_RDP_Vector(map);
 
  326   Petra_RDP_Vector& xcomp = *
new Petra_RDP_Vector(map);
 
  327   Petra_RDP_Vector& resid = *
new Petra_RDP_Vector(map);
 
  335   double elapsed_time, total_flops, MFLOPs;
 
  336   Petra_Time & timer = *
new Petra_Time(Comm);
 
  338   int LevelFill = atoi(argv[argc-3]);
 
  339   if (verbose) cout << 
"Using Level Fill = " << LevelFill << endl;
 
  340   double ShiftValue = atof(argv[argc-2]);
 
  341   if (verbose) cout << 
"Using Diagonal Shift Value of = " << ShiftValue << endl;
 
  342   int NumThreads = atoi(argv[argc-1]);
 
  343   if (verbose) cout << 
"Using " << NumThreads << 
" Threads "  << endl;
 
  345   Ifpack_RDP_CRS_RILUK * ILUK = 0;
 
  346   Ifpack_ILUK_Graph * ILUK_Graph = 0;
 
  348     elapsed_time = timer.ElapsedTime();
 
  349     ILUK_Graph = 
new Ifpack_ILUK_Graph(A.Graph(), LevelFill);
 
  350     assert(ILUK_Graph->ConstructFilledGraph()==0);
 
  351     assert(ILUK_Graph->ComputeLevels(NumThreads)==0);
 
  352     elapsed_time = timer.ElapsedTime() - elapsed_time;
 
  353     if (verbose) cout << 
"Time to construct ILUK graph = " << elapsed_time << endl;
 
  357     elapsed_time = timer.ElapsedTime();
 
  358     ILUK = 
new Ifpack_RDP_CRS_RILUK(A, *ILUK_Graph);
 
  359     ILUK->SetShiftValue(ShiftValue);
 
  360     assert(ILUK->InitValues()==0);
 
  361     assert(ILUK->Factor()==0);
 
  362     elapsed_time = timer.ElapsedTime() - elapsed_time;
 
  363     total_flops = ILUK->Flops();
 
  364     MFLOPs = total_flops/elapsed_time/1000000.0;
 
  365     if (verbose) cout << 
"Time to compute preconditioner values = " 
  366                       << elapsed_time << endl
 
  367                       << 
"MFLOPS for Factorization = " << MFLOPs << endl;
 
  371   double Tolerance = 1.0E-14;
 
  372   double * residual = 
new double[nrhs];
 
  375   elapsed_time = timer.ElapsedTime();
 
  377   BiCGSTAB(A, xcomp, bexact, ILUK, Maxiter, Tolerance, residual, total_flops, verbose);
 
  379   elapsed_time = timer.ElapsedTime() - elapsed_time;
 
  380   MFLOPs = total_flops/elapsed_time/1000000.0;
 
  381   if (verbose) cout << 
"Time to compute solution = " 
  382                     << elapsed_time << endl
 
  383                     << 
"Number of operations in solve = " << total_flops << endl
 
  384                     << 
"MFLOPS for Solve = " << MFLOPs<< endl << endl;
 
  390   for (ii=0; ii<2; ii++) {
 
  391     bool TransA = (ii==1);
 
  393     elapsed_time = timer.ElapsedTime();
 
  394     for (iii=0; iii<NumMVs; iii++)
 
  395       if ((ierr=A.Multiply(TransA, xcomp, bcomp))) 
perror1(
"Error in matvec",ierr);
 
  396     elapsed_time = timer.ElapsedTime() - elapsed_time;
 
  397     total_flops = A.Flops();
 
  398     MFLOPs = total_flops/elapsed_time/1000000.0;
 
  399     if (Comm.MyPID()==0) {
 
  401         cout << 
"\n\n****************************************************" << endl;
 
  402         cout << 
"\n\nResults for transpose multiply with standard storage" << endl;
 
  405         cout << 
"\n\nMatrix Fill cost = " << (FillTime+FillCompleteTime)/elapsed_time*NumMVs
 
  406              << 
" Matrix-Multiplies " << endl;
 
  407         cout << 
"\n\n*******************************************************" << endl;
 
  408         cout << 
"\n\nResults for no transpose multiply with standard storage" << endl;
 
  411       cout << 
"\n\nMatrix Fill cost = " << (FillTime+FillCompleteTime)/elapsed_time*NumMVs
 
  412            << 
" Matrix-Multiplies " << endl;
 
  413       cout << 
"\n\nTotal FLOPS for standard Storage (" <<NumMVs<< 
" Multiplies) = " 
  414            << total_flops<< endl;
 
  415       cout << 
"    Total time for standard Storage = " << elapsed_time<< endl;
 
  416       cout << 
"    Total MFLOPs for standard matrix multiply = " << MFLOPs << endl<< endl;
 
  422       if ((ierr=resid.Update(-1.0, btexact, 1.0, bcomp, 0.0))) 
perror1(
"Error in Update",ierr);}
 
  424       if ((ierr=resid.Update(-1.0, bexact, 1.0, bcomp, 0.0))) 
perror1(
"Error in Update",ierr);}
 
  426     if ((ierr = resid.Norm2(residual))) 
perror1(
"Error in Norm2",ierr);
 
  429       for (i=0; i< nrhs; i++) printf(
"Residual[%d]    = %22.16g\n",i,residual[i]);
 
  435   if ((ierr=A.OptimizeStorage())) 
perror1(
"Error in Petra_RDP_CRS_Matrix: OptimizeStorage",ierr);
 
  437   for (ii=0; ii<2; ii++) {
 
  438     bool TransA = (ii==1);
 
  440     elapsed_time = timer.ElapsedTime();
 
  441     for (iii=0; iii<NumMVs; iii++)
 
  442       if ((ierr=A.Multiply(TransA, xcomp, bcomp)))
 
  443         perror1(
"Error in Multiply",ierr);
 
  444     elapsed_time = timer.ElapsedTime() - elapsed_time;
 
  445     total_flops = A.Flops();
 
  446     MFLOPs = total_flops/elapsed_time/1000000.0;
 
  447     if (Comm.MyPID()==0) {
 
  448       cout << 
"\n\n****************************************************" << endl;
 
  449       if (TransA) cout << 
"\n\nResults for transpose multiply with optimized storage" << endl;
 
  450       else cout << 
"\n\nResults for no transpose multiply with optimized storage"<< endl;
 
  451       cout << 
"\n\nTotal FLOPS for optimized storage (" <<NumMVs<< 
" Multiplies) = " 
  452            << total_flops<< endl;
 
  453       cout << 
"    Total time for optimized Storage = " << elapsed_time<< endl;
 
  454       cout << 
"    Total MFLOPs for optimized matrix multiply = " << MFLOPs << endl<< endl;
 
  458       if ((ierr=resid.Update(-1.0, btexact, 1.0, bcomp, 0.0))) 
perror1(
"Error in Update",ierr);}
 
  460       if ((ierr=resid.Update(-1.0, bexact, 1.0, bcomp, 0.0))) 
perror1(
"Error in Update",ierr); }
 
  462     if ((ierr = resid.Norm2(residual))) 
perror1(
"Error in Norm2",ierr);
 
  465       for (i=0; i< nrhs; i++) printf(
"Residual[%d]    = %22.16g\n",i,residual[i]);
 
  469   free ((
void *) xguess);
 
  472   free ((
void *) xexact);
 
  474   free ((
void *) bindx);
 
  475   free ((
void *) update);
 
  478   free ((
void *) indx);
 
  479   free ((
void *) rpntr);
 
  480   free ((
void *) bpntr);
 
  481   delete [] blockSizes;
 
  517               Ifpack_RDP_CRS_RILUK *M,
 
  520               double *residual, 
double & FLOPS, 
bool verbose) {
 
  523   Petra_RDP_Vector& phat = *
new Petra_RDP_Vector(x); phat.PutScalar(0.0);
 
  524   Petra_RDP_Vector& p = *
new Petra_RDP_Vector(x); p.PutScalar(0.0);
 
  525   Petra_RDP_Vector& shat = *
new Petra_RDP_Vector(x); shat.PutScalar(0.0);
 
  526   Petra_RDP_Vector& s = *
new Petra_RDP_Vector(x); s.PutScalar(0.0);
 
  527   Petra_RDP_Vector& r = *
new Petra_RDP_Vector(x); r.PutScalar(0.0);
 
  528   Petra_RDP_Vector& rtilde = *
new Petra_RDP_Vector(x); rtilde.Random();
 
  529   Petra_RDP_Vector& v = *
new Petra_RDP_Vector(x); v.PutScalar(0.0);
 
  532   A.Multiply(
false, x, r); 
 
  534   r.Update(1.0, b, -1.0); 
 
  536   double r_norm, b_norm, scaled_r_norm, rhon, rhonm1 = 1.0;
 
  537   double alpha = 1.0, omega = 1.0, sigma;
 
  538   double omega_num, omega_den;
 
  541   scaled_r_norm = r_norm/b_norm;
 
  544   if (verbose) cout << 
"Initial residual = " << r_norm
 
  545                     << 
" Scaled residual = " << scaled_r_norm << endl;
 
  548   for (
int i=0; i<Maxiter; i++) { 
 
  550     double beta = (rhon/rhonm1) * (alpha/omega);
 
  557     double dtemp = - beta*omega;
 
  559     p.Update(1.0, r, dtemp, v, beta);
 
  563       M->LevelSolve(
false, p, phat);
 
  564     A.Multiply(
false, phat, v);
 
  567     rtilde.Dot(v,&sigma);
 
  574     s.Update(-alpha, v, 1.0, r, 0.0);
 
  578       M->LevelSolve(
false, s, shat);
 
  579     A.Multiply(
false, shat, r);
 
  581     r.Dot(s, &omega_num);
 
  582     r.Dot(r, &omega_den);
 
  583     omega = omega_num / omega_den;
 
  588     x.Update(alpha, phat, omega, shat, 1.0);
 
  589     r.Update(1.0, s, -omega);
 
  592     scaled_r_norm = r_norm/b_norm;
 
  595     if (verbose) cout << 
"Iter "<< i << 
" residual = " << r_norm
 
  596                       << 
" Scaled residual = " << scaled_r_norm << endl;
 
  598     if (scaled_r_norm < Tolerance) 
break;
 
  601   FLOPS = phat.Flops()+p.Flops()+shat.Flops()+s.Flops()+r.Flops()+rtilde.Flops()+
 
  602           v.Flops()+A.Flops()+x.Flops()+b.Flops();
 
  603   if (M!=0) FLOPS += M->Flops();
 
#define perror1(str, ierr)
void read_hb(char *data_file, int *N_global, int *n_nonzeros, double **val, int **bindx, double **x, double **b, double **bt, double **xexact)
int main(int argc, char *argv[])
void distrib_msr_matrix(int *proc_config, int *N_global, int *n_nonzeros, int *N_update, int **update, double **val, int **bindx, double **x, double **b, double **bt, double **xexact)
void BiCGSTAB(Epetra_CrsMatrix &A, Epetra_Vector &x, Epetra_Vector &b, Ifpack_CrsRiluk *M, int Maxiter, double Tolerance, double *residual, bool verbose)
void create_vbr(char *partition_file, int *proc_config, int *N_global, int *N_blk_global, int *n_nonzeros, int *n_blk_nonzeros, int *N_update, int **update, int *bindx_msr, double *val_msr, double **val, int **indx, int **rpntr, int **cpntr, int **bpntr, int **bindx)
void distrib_vbr_matrix(int *proc_config, int *N_global, int *N_blk_global, int *n_nonzeros, int *n_blk_nonzeros, int *N_update, int **update, double **val, int **indx, int **rpntr, int **cpntr, int **bpntr, int **bindx, double **x, double **b, double **bt, double **xexact)