Anasazi  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
BlockPCGSolver.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Anasazi: Block Eigensolvers Package
4 //
5 // Copyright 2004 NTESS and the Anasazi contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // This software is a result of the research described in the report
11 //
12 // "A comparison of algorithms for modal analysis in the absence
13 // of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
14 // Sandia National Laboratories, Technical report SAND2003-1028J.
15 //
16 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
17 // framework ( http://trilinos.org/ ).
18 
19 #include "BlockPCGSolver.h"
20 #include <stdexcept>
21 #include <Teuchos_Assert.hpp>
22 
23 
24 BlockPCGSolver::BlockPCGSolver(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
25  double _tol, int _iMax, int _verb)
26  : MyComm(_Comm),
27  callBLAS(),
28  callLAPACK(),
29  callFortran(),
30  K(KK),
31  Prec(0),
32  vectorPCG(0),
33  tolCG(_tol),
34  iterMax(_iMax),
35  verbose(_verb),
36  workSpace(0),
37  lWorkSpace(0),
38  numSolve(0),
39  maxIter(0),
40  sumIter(0),
41  minIter(10000)
42  {
43 
44 }
45 
46 
47 BlockPCGSolver::BlockPCGSolver(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
48  Epetra_Operator *PP,
49  double _tol, int _iMax, int _verb)
50  : MyComm(_Comm),
51  callBLAS(),
52  callLAPACK(),
53  callFortran(),
54  K(KK),
55  Prec(PP),
56  vectorPCG(0),
57  tolCG(_tol),
58  iterMax(_iMax),
59  verbose(_verb),
60  workSpace(0),
61  lWorkSpace(0),
62  numSolve(0),
63  maxIter(0),
64  sumIter(0),
65  minIter(10000)
66  {
67 
68 }
69 
70 
71 BlockPCGSolver::~BlockPCGSolver() {
72 
73  if (vectorPCG) {
74  delete vectorPCG;
75  vectorPCG = 0;
76  }
77 
78  if (workSpace) {
79  delete[] workSpace;
80  workSpace = 0;
81  lWorkSpace = 0;
82  }
83 
84 }
85 
86 
87 void BlockPCGSolver::setPreconditioner(Epetra_Operator *PP) {
88 
89  Prec = PP;
90 
91 }
92 
93 
94 int BlockPCGSolver::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
95 
96  return K->Apply(X, Y);
97 
98 }
99 
100 
101 int BlockPCGSolver::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
102 
103  int xcol = X.NumVectors();
104  int info = 0;
105 
106  if (Y.NumVectors() < xcol)
107  return -1;
108 
109 // // Use AztecOO's PCG for one right-hand side
110 // if (xcol == 1) {
111 //
112 // // Define the AztecOO object
113 // if (vectorPCG == 0) {
114 // vectorPCG = new AztecOO();
115 //
116 // // Cast away the constness for AztecOO
117 // Epetra_Operator *KK = const_cast<Epetra_Operator*>(K);
118 // if (dynamic_cast<Epetra_RowMatrix*>(KK) == 0)
119 // vectorPCG->SetUserOperator(KK);
120 // else
121 // vectorPCG->SetUserMatrix(dynamic_cast<Epetra_RowMatrix*>(KK));
122 //
123 // vectorPCG->SetAztecOption(AZ_max_iter, iterMax);
124 // //vectorPCG->SetAztecOption(AZ_kspace, iterMax);
125 // vectorPCG->SetAztecOption(AZ_output, AZ_all);
126 // if (verbose < 3)
127 // vectorPCG->SetAztecOption(AZ_output, AZ_last);
128 // if (verbose < 2)
129 // vectorPCG->SetAztecOption(AZ_output, AZ_none);
130 //
131 // vectorPCG->SetAztecOption(AZ_solver, AZ_cg);
132 //
133 // ////////////////////////////////////////////////
134 // //if (K->HasNormInf()) {
135 // // vectorPCG->SetAztecOption(AZ_precond, AZ_Neumann);
136 // // vectorPCG->SetAztecOption(AZ_poly_ord, 3);
137 // //}
138 // ////////////////////////////////////////////////
139 //
140 // if (Prec)
141 // vectorPCG->SetPrecOperator(Prec);
142 //
143 // }
144 //
145 // double *valX = X.Values();
146 // double *valY = Y.Values();
147 //
148 // int xrow = X.MyLength();
149 //
150 // bool allocated = false;
151 // if (valX == valY) {
152 // valX = new double[xrow];
153 // allocated = true;
154 // // Copy valY into valX
155 // memcpy(valX, valY, xrow*sizeof(double));
156 // }
157 //
158 // Epetra_MultiVector rhs(View, X.Map(), valX, xrow, xcol);
159 // vectorPCG->SetRHS(&rhs);
160 //
161 // Y.PutScalar(0.0);
162 // vectorPCG->SetLHS(&Y);
163 //
164 // vectorPCG->Iterate(iterMax, tolCG);
165 //
166 // numSolve += xcol;
167 //
168 // int iter = vectorPCG->NumIters();
169 // maxIter = (iter > maxIter) ? iter : maxIter;
170 // minIter = (iter < minIter) ? iter : minIter;
171 // sumIter += iter;
172 //
173 // if (allocated == true)
174 // delete[] valX;
175 //
176 // return info;
177 //
178 // } // if (xcol == 1)
179 
180  // Use block PCG for multiple right-hand sides
181  info = (xcol == 1) ? Solve(X, Y) : Solve(X, Y, xcol);
182 
183  return info;
184 
185 }
186 
187 
188 int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const {
189 
190  int info = 0;
191  int localVerbose = verbose*(MyComm.MyPID() == 0);
192 
193  int xr = X.MyLength();
194 
195  int wSize = 3*xr;
196 
197  if (lWorkSpace < wSize) {
198  if (workSpace)
199  delete[] workSpace;
200  workSpace = new (nothrow) double[wSize];
201  if (workSpace == 0) {
202  info = -1;
203  return info;
204  }
205  lWorkSpace = wSize;
206  } // if (lWorkSpace < wSize)
207 
208  double *pointer = workSpace;
209 
210  Epetra_Vector r(View, X.Map(), pointer);
211  pointer = pointer + xr;
212 
213  Epetra_Vector p(View, X.Map(), pointer);
214  pointer = pointer + xr;
215 
216  // Note: Kp and z uses the same memory space
217  Epetra_Vector Kp(View, X.Map(), pointer);
218  Epetra_Vector z(View, X.Map(), pointer);
219 
220  double tmp;
221  double initNorm = 0.0, rNorm = 0.0, newRZ = 0.0, oldRZ = 0.0, alpha = 0.0;
222  double tolSquare = tolCG*tolCG;
223 
224  memcpy(r.Values(), X.Values(), xr*sizeof(double));
225  tmp = callBLAS.DOT(xr, r.Values(), r.Values());
226  MyComm.SumAll(&tmp, &initNorm, 1);
227 
228  Y.PutScalar(0.0);
229 
230  if (localVerbose > 1) {
231  cout << endl;
232  cout << " --- PCG Iterations --- " << endl;
233  }
234 
235  int iter;
236  for (iter = 1; iter <= iterMax; ++iter) {
237 
238  if (Prec) {
239  Prec->ApplyInverse(r, z);
240  }
241  else {
242  memcpy(z.Values(), r.Values(), xr*sizeof(double));
243  }
244 
245  if (iter == 1) {
246  tmp = callBLAS.DOT(xr, r.Values(), z.Values());
247  MyComm.SumAll(&tmp, &newRZ, 1);
248  memcpy(p.Values(), z.Values(), xr*sizeof(double));
249  }
250  else {
251  oldRZ = newRZ;
252  tmp = callBLAS.DOT(xr, r.Values(), z.Values());
253  MyComm.SumAll(&tmp, &newRZ, 1);
254  p.Update(1.0, z, newRZ/oldRZ);
255  }
256 
257  K->Apply(p, Kp);
258 
259  tmp = callBLAS.DOT(xr, p.Values(), Kp.Values());
260  MyComm.SumAll(&tmp, &alpha, 1);
261  alpha = newRZ/alpha;
262 
263  if (alpha <= 0.0) {
264  if (MyComm.MyPID() == 0) {
265  cerr << endl << endl;
266  cerr.precision(4);
267  cerr.setf(ios::scientific, ios::floatfield);
268  cerr << " !!! Non-positive value for p^TKp (" << alpha << ") !!!";
269  cerr << endl << endl;
270  }
271  assert(alpha > 0.0);
272  }
273 
274  callBLAS.AXPY(xr, alpha, p.Values(), Y.Values());
275 
276  alpha *= -1.0;
277  callBLAS.AXPY(xr, alpha, Kp.Values(), r.Values());
278 
279  // Check convergence
280  tmp = callBLAS.DOT(xr, r.Values(), r.Values());
281  MyComm.SumAll(&tmp, &rNorm, 1);
282 
283  if (localVerbose > 1) {
284  cout << " Iter. " << iter;
285  cout.precision(4);
286  cout.setf(ios::scientific, ios::floatfield);
287  cout << " Residual reduction " << sqrt(rNorm/initNorm) << endl;
288  }
289 
290  if (rNorm <= tolSquare*initNorm)
291  break;
292 
293  } // for (iter = 1; iter <= iterMax; ++iter)
294 
295  if (localVerbose == 1) {
296  cout << endl;
297  cout << " --- End of PCG solve ---" << endl;
298  cout << " Iter. " << iter;
299  cout.precision(4);
300  cout.setf(ios::scientific, ios::floatfield);
301  cout << " Residual reduction " << sqrt(rNorm/initNorm) << endl;
302  cout << endl;
303  }
304 
305  if (localVerbose > 1) {
306  cout << endl;
307  }
308 
309  numSolve += 1;
310 
311  minIter = (iter < minIter) ? iter : minIter;
312  maxIter = (iter > maxIter) ? iter : maxIter;
313  sumIter += iter;
314 
315  return info;
316 
317 }
318 
319 
320 int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y, int blkSize) const {
321 
322  int xrow = X.MyLength();
323  int xcol = X.NumVectors();
324  int ycol = Y.NumVectors();
325 
326  int info = 0;
327  int localVerbose = verbose*(MyComm.MyPID() == 0);
328 
329  // Machine epsilon to check singularities
330  double eps = 0.0;
331  callLAPACK.LAMCH('E', eps);
332 
333  double *valX = X.Values();
334 
335  int NB = 3 + callFortran.LAENV(1, "dsytrd", "u", blkSize, -1, -1, -1, 6, 1);
336  int lworkD = (blkSize > NB) ? blkSize*blkSize : NB*blkSize;
337 
338  int wSize = 4*blkSize*xrow + 3*blkSize + 2*blkSize*blkSize + lworkD;
339 
340  bool useY = true;
341  if (ycol % blkSize != 0) {
342  // Allocate an extra block to store the solutions
343  wSize += blkSize*xrow;
344  useY = false;
345  }
346 
347  if (lWorkSpace < wSize) {
348  delete[] workSpace;
349  workSpace = new (nothrow) double[wSize];
350  if (workSpace == 0) {
351  info = -1;
352  return info;
353  }
354  lWorkSpace = wSize;
355  } // if (lWorkSpace < wSize)
356 
357  double *pointer = workSpace;
358 
359  // Array to store the matrix PtKP
360  double *PtKP = pointer;
361  pointer = pointer + blkSize*blkSize;
362 
363  // Array to store coefficient matrices
364  double *coeff = pointer;
365  pointer = pointer + blkSize*blkSize;
366 
367  // Workspace array
368  double *workD = pointer;
369  pointer = pointer + lworkD;
370 
371  // Array to store the eigenvalues of P^t K P
372  double *da = pointer;
373  pointer = pointer + blkSize;
374 
375  // Array to store the norms of right hand sides
376  double *initNorm = pointer;
377  pointer = pointer + blkSize;
378 
379  // Array to store the norms of residuals
380  double *resNorm = pointer;
381  pointer = pointer + blkSize;
382 
383  // Array to store the residuals
384  double *valR = pointer;
385  pointer = pointer + xrow*blkSize;
386  Epetra_MultiVector R(View, X.Map(), valR, xrow, blkSize);
387 
388  // Array to store the preconditioned residuals
389  double *valZ = pointer;
390  pointer = pointer + xrow*blkSize;
391  Epetra_MultiVector Z(View, X.Map(), valZ, xrow, blkSize);
392 
393  // Array to store the search directions
394  double *valP = pointer;
395  pointer = pointer + xrow*blkSize;
396  Epetra_MultiVector P(View, X.Map(), valP, xrow, blkSize);
397 
398  // Array to store the image of the search directions
399  double *valKP = pointer;
400  pointer = pointer + xrow*blkSize;
401  Epetra_MultiVector KP(View, X.Map(), valKP, xrow, blkSize);
402 
403  // Pointer to store the solutions
404  double *valSOL = (useY == true) ? Y.Values() : pointer;
405 
406  int iRHS;
407  for (iRHS = 0; iRHS < xcol; iRHS += blkSize) {
408 
409  int numVec = (iRHS + blkSize < xcol) ? blkSize : xcol - iRHS;
410 
411  // Set the initial residuals to the right hand sides
412  if (numVec < blkSize) {
413  R.Random();
414  }
415  memcpy(valR, valX + iRHS*xrow, numVec*xrow*sizeof(double));
416 
417  // Set the initial guess to zero
418  valSOL = (useY == true) ? Y.Values() + iRHS*xrow : valSOL;
419  Epetra_MultiVector SOL(View, X.Map(), valSOL, xrow, blkSize);
420  SOL.PutScalar(0.0);
421 
422  int ii;
423  int iter;
424  int nFound;
425 
426  R.Norm2(initNorm);
427 
428  if (localVerbose > 1) {
429  cout << endl;
430  cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1 << endl;
431  if (localVerbose > 2) {
432  fprintf(stderr,"\n");
433  for (ii = 0; ii < numVec; ++ii) {
434  cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << endl;
435  }
436  cout << endl;
437  }
438  }
439 
440  // Iteration loop
441  for (iter = 1; iter <= iterMax; ++iter) {
442 
443  // Apply the preconditioner
444  if (Prec)
445  Prec->ApplyInverse(R, Z);
446  else
447  Z = R;
448 
449  // Define the new search directions
450  if (iter == 1) {
451  P = Z;
452  }
453  else {
454  // Compute P^t K Z
455  callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow,
456  0.0, workD, blkSize);
457  MyComm.SumAll(workD, coeff, blkSize*blkSize);
458 
459  // Compute the coefficient (P^t K P)^{-1} P^t K Z
460  callBLAS.GEMM('T', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
461  0.0, workD, blkSize);
462  for (ii = 0; ii < blkSize; ++ii)
463  callFortran.SCAL_INCX(blkSize, da[ii], workD + ii, blkSize);
464  callBLAS.GEMM('N', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
465  0.0, coeff, blkSize);
466 
467  // Update the search directions
468  // Note: Use KP as a workspace
469  memcpy(KP.Values(), P.Values(), xrow*blkSize*sizeof(double));
470  callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, 1.0, KP.Values(), xrow, coeff, blkSize,
471  0.0, P.Values(), xrow);
472 
473  P.Update(1.0, Z, -1.0);
474 
475  } // if (iter == 1)
476 
477  K->Apply(P, KP);
478 
479  // Compute P^t K P
480  callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow,
481  0.0, workD, blkSize);
482  MyComm.SumAll(workD, PtKP, blkSize*blkSize);
483 
484  // Eigenvalue decomposition of P^t K P
485  callFortran.SYEV('V', 'U', blkSize, PtKP, blkSize, da, workD, lworkD, &info);
486  if (info) {
487  // Break the loop as spectral decomposition failed
488  break;
489  } // if (info)
490 
491  // Compute the pseudo-inverse of the eigenvalues
492  for (ii = 0; ii < blkSize; ++ii) {
493  // FIXME (mfh 14 Jan 2011) Is this the right exception to
494  // throw? I'm just replacing an exit(-1) with an exception,
495  // as per Trilinos coding standards.
496  TEUCHOS_TEST_FOR_EXCEPTION(da[ii] < 0.0, std::runtime_error, "Negative "
497  "eigenvalue for P^T K P: da[" << ii << "] = "
498  << da[ii] << ".");
499  da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii];
500  } // for (ii = 0; ii < blkSize; ++ii)
501 
502  // Compute P^t R
503  callBLAS.GEMM('T', 'N', blkSize, blkSize, xrow, 1.0, P.Values(), xrow, R.Values(), xrow,
504  0.0, workD, blkSize);
505  MyComm.SumAll(workD, coeff, blkSize*blkSize);
506 
507  // Compute the coefficient (P^t K P)^{-1} P^t R
508  callBLAS.GEMM('T', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
509  0.0, workD, blkSize);
510  for (ii = 0; ii < blkSize; ++ii)
511  callFortran.SCAL_INCX(blkSize, da[ii], workD + ii, blkSize);
512  callBLAS.GEMM('N', 'N', blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
513  0.0, coeff, blkSize);
514 
515  // Update the solutions
516  callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, 1.0, P.Values(), xrow, coeff, blkSize,
517  1.0, valSOL, xrow);
518 
519  // Update the residuals
520  callBLAS.GEMM('N', 'N', xrow, blkSize, blkSize, -1.0, KP.Values(), xrow, coeff, blkSize,
521  1.0, R.Values(), xrow);
522 
523  // Check convergence
524  R.Norm2(resNorm);
525  nFound = 0;
526  for (ii = 0; ii < numVec; ++ii) {
527  if (resNorm[ii] <= tolCG*initNorm[ii])
528  nFound += 1;
529  }
530 
531  if (localVerbose > 1) {
532  cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1;
533  cout << " -- Iteration " << iter << " -- " << nFound << " converged vectors\n";
534  if (localVerbose > 2) {
535  cout << endl;
536  for (ii = 0; ii < numVec; ++ii) {
537  cout << " ... ";
538  cout.width(5);
539  cout << ii << " ... Residual = ";
540  cout.precision(2);
541  cout.setf(ios::scientific, ios::floatfield);
542  cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << endl;
543  }
544  cout << endl;
545  }
546  }
547 
548  if (nFound == numVec) {
549  break;
550  }
551 
552  } // for (iter = 1; iter <= maxIter; ++iter)
553 
554  if (useY == false) {
555  // Copy the solutions back into Y
556  memcpy(Y.Values() + xrow*iRHS, valSOL, numVec*xrow*sizeof(double));
557  }
558 
559  numSolve += nFound;
560 
561  if (nFound == numVec) {
562  minIter = (iter < minIter) ? iter : minIter;
563  maxIter = (iter > maxIter) ? iter : maxIter;
564  sumIter += iter;
565  }
566 
567  } // for (iRHS = 0; iRHS < xcol; iRHS += blkSize)
568 
569  return info;
570 
571 }
572 
573 
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)