Anasazi  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
JDPCG.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Anasazi: Block Eigensolvers Package
4 //
5 // Copyright 2004 NTESS and the Anasazi contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 //**************************************************************************
11 //
12 // NOTICE
13 //
14 // This software is a result of the research described in the cout
15 //
16 // " A comparison of algorithms for modal analysis in the absence
17 // of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
18 // Sandia National Laboratories, Technical cout SAND2003-1028J.
19 //
20 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
21 // framework ( http://software.sandia.gov/trilinos/ ).
22 //
23 // The distribution of this software follows also the rules defined in Trilinos.
24 // This notice shall be marked on any reproduction of this software, in whole or
25 // in part.
26 //
27 // This program is distributed in the hope that it will be useful, but
28 // WITHOUT ANY WARRANTY; without even the implied warranty of
29 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30 //
31 // Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
32 //
33 //**************************************************************************
34 
35 #include "JDPCG.h"
36 #include <stdexcept>
37 #include <Teuchos_Assert.hpp>
38 
39 
40 JDPCG::JDPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
41  const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk, int _numBlk,
42  double _tol, int _maxIterES, int _maxIterLS, int _verb, double *_weight) :
43  myVerify(_Comm),
44  callBLAS(),
45  callFortran(),
46  callLAPACK(),
47  modalTool(_Comm),
48  mySort(),
49  MyComm(_Comm),
50  K(KK),
51  M(MM),
52  Prec(PP),
53  MyWatch(_Comm),
54  tolEigenSolve(_tol),
55  maxIterEigenSolve(_maxIterES),
56  maxIterLinearSolve(_maxIterLS),
57  blockSize(_blk),
58  numBlock(_numBlk),
59  normWeight(_weight),
60  verbose(_verb),
61  historyCount(0),
62  resHistory(0),
63  maxSpaceSize(0),
64  sumSpaceSize(0),
65  spaceSizeHistory(0),
66  maxIterPCG(0),
67  sumIterPCG(0),
68  iterPCGHistory(0),
69  memRequested(0.0),
70  highMem(0.0),
71  massOp(0),
72  numCorrectionPrec(0),
73  numCorrectionSolve(0),
74  numPCGmassOp(0),
75  numPCGstifOp(0),
76  numRestart(0),
77  outerIter(0),
78  precOp(0),
79  residual(0),
80  stifOp(0),
81  timeBuildQtMPMQ(0.0),
82  timeCorrectionPrec(0.0),
83  timeCorrectionSolve(0.0),
84  timeLocalProj(0.0),
85  timeLocalSolve(0.0),
86  timeLocalUpdate(0.0),
87  timeMassOp(0.0),
88  timeNorm(0.0),
89  timeOrtho(0.0),
90  timeOuterLoop(0.0),
91  timePCGEigCheck(0.0),
92  timePCGLoop(0.0),
93  timePCGOpMult(0.0),
94  timePCGPrec(0.0),
95  timePostProce(0.0),
96  timePrecOp(0.0),
97  timeResidual(0.0),
98  timeRestart(0.0),
99  timeStifOp(0.0)
100  {
101 
102 }
103 
104 
105 JDPCG::~JDPCG() {
106 
107  if (resHistory) {
108  delete[] resHistory;
109  resHistory = 0;
110  }
111 
112  if (spaceSizeHistory) {
113  delete[] spaceSizeHistory;
114  spaceSizeHistory = 0;
115  }
116 
117  if (iterPCGHistory) {
118  delete[] iterPCGHistory;
119  iterPCGHistory = 0;
120  }
121 
122 }
123 
124 
125 int JDPCG::jacobiPreconditioner(const Epetra_MultiVector &B, Epetra_MultiVector &PrecB,
126  const Epetra_MultiVector *U, const Epetra_MultiVector *V,
127  double *invQtMPMQ, int ldQtMPMQ, double *PMQ, double *work, double *WS) {
128 
129  // This routine applies the "projected" preconditioner to the vectors B and stores
130  // the result in the vectors PrecB.
131  //
132  // PrecB = (I - P^{-1}MQ ( Q^tMP^{-1}MQ )^{-1} Q^t M) P^{-1} * B
133  //
134  // where P is the preconditioner.
135  //
136  // B = Input vectors
137  // PrecB = Preconditioned vectors
138  // V = converged eigenvectors
139  // U = tentative eigenvectors to be corrected.
140  //
141  // Note: Q = [V, U]
142  //
143  // invQtMPMQ = Factor form of the matrix Q^t*M*P^{-1}*M*Q (from POTRF)
144  // ldQtMPMQ = Leading dimension for invQtMPMQ
145  //
146  // PMQ = Array to store the column vectors P^{-1}*M*Q
147  // Assumption: PMQ has the same distribution than B over the processors
148  //
149  // work = Workspace array of size 2 * (# of columns in Q) * (# of columns in B)
150  //
151  // WS = Workspace array of size (# of rows in B) * (# of columns in B)
152 
153  int info = 0;
154 
155  int bC = B.NumVectors();
156  int bR = B.MyLength();
157 
158  // Compute P^{-1} B
159  timeCorrectionPrec -= MyWatch.WallTime();
160  if (Prec) {
161  Prec->ApplyInverse(B, PrecB);
162  }
163  else {
164  memcpy(PrecB.Values(), B.Values(), bR*bC*sizeof(double));
165  }
166  timeCorrectionPrec += MyWatch.WallTime();
167 
168  int uC = U->NumVectors();
169  int vC = (V) ? V->NumVectors() : 0;
170 
171  int sizeQ = uC + vC;
172 
173  // Compute Q^t M P^{-t} B
174  callBLAS.GEMM('T', 'N', sizeQ, bC, bR, 1.0, PMQ, bR, B.Values(), bR,
175  0.0, work + sizeQ*bC, sizeQ);
176  MyComm.SumAll(work + sizeQ*bC, work, sizeQ*bC);
177 
178  memcpy(work + sizeQ*bC, work, sizeQ*bC*sizeof(double));
179 
180  // Compute ( Q^t M P^{-1} M Q )^{-1} Q^t M P^{-t} B
181  callLAPACK.POTRS('U', sizeQ, bC, invQtMPMQ, ldQtMPMQ, work, sizeQ, &info);
182 
183  // Subtract P^{-1} M Q ( Q^t M P^{-1} M Q )^{-1} Q^t M P^{-t} B from P^{-t} B
184  callBLAS.GEMM('N', 'N', bR, bC, sizeQ, -1.0, PMQ, bR, work, sizeQ, 1.0, PrecB.Values(), bR);
185 
186  Epetra_MultiVector MPB(View, B.Map(), WS, bR, bC);
187  M->Apply(PrecB, MPB);
188 
189  // Compute Q^t M PrecB
190  callBLAS.GEMM('T', 'N', uC, bC, bR, 1.0, U->Values(), bR, MPB.Values(), bR,
191  0.0, work + vC + sizeQ*bC, sizeQ);
192  if (V) {
193  callBLAS.GEMM('T', 'N', vC, bC, bR, 1.0, V->Values(), bR, MPB.Values(), bR,
194  0.0, work + sizeQ*bC, sizeQ);
195  }
196  MyComm.SumAll(work + sizeQ*bC, work, sizeQ*bC);
197 
198  // Test for second projection
199  double *Mnorm = new double[bC];
200  MPB.Dot(PrecB, Mnorm);
201 
202  bool doSecond = false;
203  for (int j = 0; j < bC; ++j) {
204  double tolOrtho = 1.0e-28*Mnorm[j];
205  for (int i = 0; i < sizeQ; ++i) {
206  double tmp = work[i + j*sizeQ];
207  if (tmp*tmp > tolOrtho) {
208  doSecond = true;
209  break;
210  }
211  }
212  if (doSecond == true) {
213  // Compute ( Q^t M P^{-1} M Q )^{-1} Q^t M PrecB
214  callLAPACK.POTRS('U', sizeQ, bC, invQtMPMQ, ldQtMPMQ, work, sizeQ, &info);
215  // Subtract P^{-1} M Q ( Q^t M P^{-1} M Q )^{-1} Q^t M PrecB from PrecB
216  callBLAS.GEMM('N', 'N', bR, bC, sizeQ, -1.0, PMQ, bR, work, sizeQ, 1.0, PrecB.Values(), bR);
217  break;
218  }
219  }
220  delete[] Mnorm;
221 
222  numCorrectionPrec += bC;
223 
224  return info;
225 
226 }
227 
228 
229 int JDPCG::jacobiPCG(Epetra_MultiVector &Rlin, Epetra_MultiVector &Y,
230  const Epetra_MultiVector *U, const Epetra_MultiVector *V,
231  double eta, double tolCG, int iterMax,
232  double *invQtMPMQ, int ldQtMPMQ, double *PMQ,
233  double *workPrec, double *workPCG,
234  const Epetra_Vector *vectWeight, const Epetra_MultiVector *orthoVec) {
235 
236  // This routine applies a block PCG algorithm to solve the equation
237  //
238  // (I - MQ*Q^t) * ( K - eta * M ) * (I - Q*Q^t*M) Y = X
239  //
240  // with (I - Q*Q^t*M) * Y = Y
241  // with Q = [V, U]
242  // where the preconditioner is given by
243  //
244  // (I - MQ*Q^t) * Prec^{-1} * (I - Q*Q^t*M)
245  //
246  // Rlin = Input vectors
247  // Y = Solution vectors
248  // V = converged eigenvectors
249  // U = tentative eigenvectors to be corrected.
250  //
251  // eta = shift for the linear operator
252  //
253  // tolCG = Tolerance required for convergence
254  // iterMax = Maximum number of iterations allowed
255  //
256  // invQtMPMQ = Factor form of the matrix Q^t*M*P^{-1}*M*Q (from POTRF)
257  // ldQtMPMQ = Leading dimension for invQtMPMQ
258  //
259  // PMQ = Array to store the column vectors P^{-1}*M*Q
260  // Assumption: PMQ has the same distribution than B over the processors
261  //
262  // workPrec = Workspace array of size 2 * (# of columns in Q) * (# of columns in X)
263  // This workspace is exclusively used to apply the preconditioner
264  //
265  // workPCG = Workspace array for the variables in the PCG algorithm
266  // Its size must allow the definition of
267  // - 5 blocks of (# of columns in X) vectors distributed as X across the processors
268  // - 4 arrays of length (# of columns in X)
269  // - 3 square matrices of size (# of columns in X)
270  //
271  // vectWeight = Weights for the L^2 norm to compute to check the eigenresiduals
272  //
273  // orthoVec = Space where CG computations are orthogonal to.
274 
275  int xrow = Y.MyLength();
276  int xcol = Y.NumVectors();
277 
278  int info = 0;
279  int localVerbose = verbose*(MyComm.MyPID() == 0);
280 
281  double *pointer = workPCG;
282 
283  // Arrays associated with the solution to the linear system
284 
285  // Array to store the matrix PtKP
286  double *PtKP = pointer;
287  pointer = pointer + xcol*xcol;
288 
289  // Array to store coefficient matrices
290  double *coeff = pointer;
291  pointer = pointer + xcol*xcol;
292 
293  // Workspace array
294  double *workD = pointer;
295  pointer = pointer + xcol*xcol;
296 
297  // Array to store the eigenvalues of P^t K P
298  double *da = pointer;
299  pointer = pointer + xcol;
300 
301  // Array to store the norms of right hand sides
302  double *initNorm = pointer;
303  pointer = pointer + xcol;
304 
305  // Array to store the norms of current residuals
306  double *resNorm = pointer;
307  pointer = pointer + xcol;
308 
309  // Array to store the preconditioned residuals
310  double *valZ = pointer;
311  pointer = pointer + xrow*xcol;
312  Epetra_MultiVector Z(View, Y.Map(), valZ, xrow, xcol);
313 
314  // Array to store the search directions
315  double *valP = pointer;
316  pointer = pointer + xrow*xcol;
317  Epetra_MultiVector P(View, Y.Map(), valP, xrow, xcol);
318 
319  // Array to store the image of the search directions
320  double *valKP = pointer;
321  pointer = pointer + xrow*xcol;
322  Epetra_MultiVector KP(View, Y.Map(), valKP, xrow, xcol);
323 
324  // Arrays associated to the corrected eigenvectors
325  // Array to store the projected stiffness matrix
326  double *UtKU = pointer;
327  pointer = pointer + xcol*xcol;
328 
329  // Array to store the corrected eigenvalues
330  double *theta = pointer;
331  pointer = pointer + xcol;
332 
333  // Array to store the norms of eigen-residuals for corrected vectors
334  double *resEig = pointer;
335  pointer = pointer + xcol;
336 
337  // Array to store the norms of previous eigen-residuals for corrected vectors
338  double *oldEig = pointer;
339  pointer = pointer + xcol;
340 
341  // Array to store the stiffness-image of the corrected eigenvectors
342  double *valKU = pointer;
343  pointer = pointer + xrow*xcol;
344  Epetra_MultiVector KU(View, Y.Map(), valKU, xrow, xcol);
345 
346  // Array to store the mass-image of the corrected eigenvectors
347  Epetra_MultiVector MU(View, Y.Map(), (M) ? pointer : Z.Values(), xrow, xcol);
348 
349  // Set the initial guess to zero
350  Y.PutScalar(0.0);
351 
352  int ii;
353  int iter;
354  int nFound;
355 
356  // Define the size of the "orthogonal" space [V, U]
357  // Note: We assume U is non zero and thus sizeQ is non zero.
358  int sizeQ = 0;
359  sizeQ += (U) ? U->NumVectors() : 0;
360  sizeQ += (V) ? V->NumVectors() : 0;
361 
362  bool isNegative = false;
363 
364  Rlin.Norm2(initNorm);
365 
366  if (localVerbose > 3) {
367  cout << endl;
368  cout.precision(4);
369  cout.setf(ios::scientific, ios::floatfield);
370  for (ii = 0; ii < xcol; ++ii) {
371  cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << endl;
372  }
373  cout << endl;
374  }
375 
376  // Iteration loop
377  timePCGLoop -= MyWatch.WallTime();
378  for (iter = 1; iter <= iterMax; ++iter) {
379 
380  // Apply the preconditioner
381  timePCGPrec -= MyWatch.WallTime();
382  jacobiPreconditioner(Rlin, Z, U, V, invQtMPMQ, ldQtMPMQ, PMQ, workPrec, MU.Values());
383  timePCGPrec += MyWatch.WallTime();
384 
385  if (orthoVec) {
386  // Note: Use MU as workspace
387  if (M)
388  M->Apply(Z, MU);
389  modalTool.massOrthonormalize(Z, MU, M, *orthoVec, blockSize, 1);
390  }
391 
392  // Define the new search directions
393  if (iter == 1) {
394  P = Z;
395  }
396  else {
397  // Compute P^t K Z
398  callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow,
399  0.0, workD, xcol);
400  MyComm.SumAll(workD, coeff, xcol*xcol);
401 
402  // Compute the coefficient (P^t K P)^{-1} P^t K Z
403  callBLAS.GEMM('T', 'N', xcol, xcol, xcol, 1.0, PtKP, xcol, coeff, xcol,
404  0.0, workD, xcol);
405  for (ii = 0; ii < xcol; ++ii)
406  callFortran.SCAL_INCX(xcol, da[ii], workD + ii, xcol);
407  callBLAS.GEMM('N', 'N', xcol, xcol, xcol, 1.0, PtKP, xcol, workD, xcol,
408  0.0, coeff, xcol);
409 
410  // Update the search directions
411  // Note: Use KP as a workspace
412  memcpy(KP.Values(), P.Values(), xrow*xcol*sizeof(double));
413  callBLAS.GEMM('N', 'N', xrow, xcol, xcol, 1.0, KP.Values(), xrow, coeff, xcol,
414  0.0, P.Values(), xrow);
415 
416  P.Update(1.0, Z, -1.0);
417 
418  } // if (iter == 1)
419 
420  timePCGOpMult -= MyWatch.WallTime();
421  K->Apply(P, KP);
422  numPCGstifOp += xcol;
423  if (eta != 0.0) {
424  // Apply the mass matrix
425  // Note: Use Z as a workspace
426  if (M) {
427  M->Apply(P, Z);
428  callBLAS.AXPY(xrow*xcol, -eta, Z.Values(), KP.Values());
429  }
430  else {
431  callBLAS.AXPY(xrow*xcol, -eta, P.Values(), KP.Values());
432  }
433  numPCGmassOp += xcol;
434  }
435  timePCGOpMult += MyWatch.WallTime();
436 
437  // Compute P^t K P
438  callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow,
439  0.0, workD, xcol);
440  MyComm.SumAll(workD, PtKP, xcol*xcol);
441 
442  // Eigenvalue decomposition of P^t K P
443  int nev = xcol;
444  info = modalTool.directSolver(xcol, PtKP, xcol, 0, 0, nev, PtKP, xcol, da, 0, 10);
445 
446  if (info != 0) {
447  // Break the loop as spectral decomposition failed
448  info = - iterMax - 1;
449  sumIterPCG += iter;
450  maxIterPCG = (iter > maxIterPCG) ? iter : maxIterPCG;
451  return info;
452  } // if (info != 0)
453 
454  // Compute the pseudo-inverse of the eigenvalues
455  for (ii = 0; ii < xcol; ++ii) {
456  if (da[ii] < 0.0) {
457  isNegative = true;
458  break;
459  }
460  else {
461  da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii];
462  }
463  } // for (ii = 0; ii < xcol; ++ii)
464 
465  if (isNegative == true) {
466  if (localVerbose > 0) {
467  cout << endl;
468  cout.precision(4);
469  cout.setf(ios::scientific, ios::floatfield);
470  cout << " !! Negative eigenvalue in block PCG (" << da[ii] << ") !!\n";
471  cout << endl;
472  }
473  info = - iter;
474  sumIterPCG += iter;
475  maxIterPCG = (iter > maxIterPCG) ? iter : maxIterPCG;
476  return info;
477  }
478 
479  // Compute P^t R
480  callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, P.Values(), xrow, Rlin.Values(), xrow,
481  0.0, workD, xcol);
482  MyComm.SumAll(workD, coeff, xcol*xcol);
483 
484  // Compute the coefficient (P^t K P)^{-1} P^t R
485  callBLAS.GEMM('T', 'N', xcol, xcol, xcol, 1.0, PtKP, xcol, coeff, xcol,
486  0.0, workD, xcol);
487  for (ii = 0; ii < xcol; ++ii)
488  callFortran.SCAL_INCX(xcol, da[ii], workD + ii, xcol);
489  callBLAS.GEMM('N', 'N', xcol, xcol, xcol, 1.0, PtKP, xcol, workD, xcol,
490  0.0, coeff, xcol);
491 
492  // Update the solutions of the linear system
493  callBLAS.GEMM('N', 'N', xrow, xcol, xcol, 1.0, P.Values(), xrow, coeff, xcol,
494  1.0, Y.Values(), xrow);
495 
496  // Update the residuals for the linear system
497  callBLAS.GEMM('N', 'N', xrow, xcol, xcol, -1.0, KP.Values(), xrow, coeff, xcol,
498  1.0, Rlin.Values(), xrow);
499 
500  // Check convergence
501  Rlin.Norm2(resNorm);
502  nFound = 0;
503  for (ii = 0; ii < xcol; ++ii) {
504  if (resNorm[ii] <= tolCG*initNorm[ii])
505  nFound += 1;
506  }
507 
508  if (localVerbose > 3) {
509  cout << endl;
510  for (ii = 0; ii < xcol; ++ii) {
511  cout << " ... ";
512  cout.width(5);
513  cout << ii << " ... Residual = ";
514  cout.precision(4);
515  cout.setf(ios::scientific, ios::floatfield);
516  cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << endl;
517  }
518  cout << endl;
519  }
520 
521  if (nFound == xcol) {
522  info = iter;
523  break;
524  }
525 
526  timePCGEigCheck -= MyWatch.WallTime();
527 
528  // Check the residuals for the corrected eigenvectors
529  // Note: Use Z as workspace to store the corrected vectors
530  Z.Update(1.0, *U, -1.0, Y, 0.0);
531 
532  // Compute U^t K U
533  K->Apply(Z, KU);
534  numPCGstifOp += xcol;
535  callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, Z.Values(), xrow, KU.Values(), xrow,
536  0.0, workD, xcol);
537  MyComm.SumAll(workD, UtKU, xcol*xcol);
538 
539  // Compute U^t M U
540  // Note: Use coeff as storage space
541  if (M) {
542  M->Apply(Z, MU);
543  numPCGmassOp += xcol;
544  callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, Z.Values(), xrow, MU.Values(), xrow,
545  0.0, workD, xcol);
546  MyComm.SumAll(workD, coeff, xcol*xcol);
547  }
548  else {
549  callBLAS.GEMM('T', 'N', xcol, xcol, xrow, 1.0, Z.Values(), xrow, Z.Values(), xrow,
550  0.0, workD, xcol);
551  MyComm.SumAll(workD, coeff, xcol*xcol);
552  }
553 
554  nev = xcol;
555  info = modalTool.directSolver(xcol, UtKU, xcol, coeff, xcol, nev, workD, xcol, theta, 0,
556  (blockSize == 1) ? 1 : 0);
557 
558  if ((info < 0) || (nev < xcol)) {
559  // Break the loop as spectral decomposition failed
560  info = - iterMax - 1;
561  sumIterPCG += iter;
562  maxIterPCG = (iter > maxIterPCG) ? iter : maxIterPCG;
563  return info;
564  } // if ((info < 0) || (nev < xcol))
565 
566  // Compute the eigenresiduals for the corrected vectors
567  // Note: Use Z as workspace to store the residuals
568  if (M) {
569  callBLAS.GEMM('N', 'N', xrow, xcol, xcol, 1.0, MU.Values(), xrow, workD, xcol,
570  0.0, Z.Values(), xrow);
571  }
572  for (ii = 0; ii < xcol; ++ii)
573  callBLAS.SCAL(xrow, theta[ii], Z.Values() + ii*xrow);
574  callBLAS.GEMM('N', 'N', xrow, xcol, xcol, 1.0, KU.Values(), xrow, workD, xcol,
575  -1.0, Z.Values(), xrow);
576 
577  if (vectWeight)
578  Z.NormWeighted(*vectWeight, resEig);
579  else
580  Z.Norm2(resEig);
581 
582  timePCGEigCheck += MyWatch.WallTime();
583 
584  if (iter > 1) {
585  // Scale the norms of residuals with the eigenvalues
586  // Count the number of converged eigenvectors
587  nFound = 0;
588  int nGrow = 0;
589  for (ii = 0; ii < xcol; ++ii) {
590  nFound = (resEig[ii] < tolEigenSolve*theta[ii]) ? nFound + 1 : nFound;
591  nGrow = (resEig[ii] > oldEig[ii]) ? nGrow + 1 : nGrow;
592  } // for (ii = 0; ii < xcol; ++ii)
593  if ((nFound > 0) || (nGrow > 0)) {
594  info = iter;
595  break;
596  }
597  } // if (iter > 1)
598 
599  memcpy(oldEig, resEig, xcol*sizeof(double));
600 
601  } // for (iter = 1; iter <= maxIter; ++iter)
602  timePCGLoop += MyWatch.WallTime();
603 
604  sumIterPCG += iter;
605  maxIterPCG = (iter > maxIterPCG) ? iter : maxIterPCG;
606 
607  return info;
608 
609 }
610 
611 
612 int JDPCG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
613 
614  return JDPCG::reSolve(numEigen, Q, lambda);
615 
616 }
617 
618 
619 int JDPCG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
620 
621  // Computes the smallest eigenvalues and the corresponding eigenvectors
622  // of the generalized eigenvalue problem
623  //
624  // K X = M X Lambda
625  //
626  // using a Jacobi-Davidson algorithm with PCG (Block version of Notay's algorithm).
627  //
628  // Reference: "Combination of Jacobi-Davidson and conjugate gradients for the partial
629  // symmetric eigenproblem", Y. Notay, Numer. Linear Algebra Appl. (2002), 9:21-44.
630  //
631  // Input variables:
632  //
633  // numEigen (integer) = Number of eigenmodes requested
634  //
635  // Q (Epetra_MultiVector) = Converged eigenvectors
636  // The number of columns of Q must be at least numEigen + blockSize.
637  // The rows of Q are distributed across processors.
638  // At exit, the first numEigen columns contain the eigenvectors requested.
639  //
640  // lambda (array of doubles) = Converged eigenvalues
641  // At input, it must be of size numEigen + blockSize.
642  // At exit, the first numEigen locations contain the eigenvalues requested.
643  //
644  // startingEV (integer) = Number of existing converged eigenvectors
645  // We assume that the user has check the eigenvectors and
646  // their M-orthonormality.
647  //
648  // Return information on status of computation
649  //
650  // info >= 0 >> Number of converged eigenpairs at the end of computation
651  //
652  // // Failure due to input arguments
653  //
654  // info = - 1 >> The stiffness matrix K has not been specified.
655  // info = - 2 >> The maps for the matrix K and the matrix M differ.
656  // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
657  // info = - 4 >> The maps for the vectors and the matrix K differ.
658  // info = - 5 >> Q is too small for the number of eigenvalues requested.
659  // info = - 6 >> Q is too small for the computation parameters.
660  //
661  // info = - 7 >> The mass matrix M has not been specified.
662  // info = - 8 >> The number of blocks is too small for the number of eigenvalues.
663  //
664  // info = - 10 >> Failure during the mass orthonormalization
665  //
666  // info = - 30 >> MEMORY
667  //
668 
669  // Check the input parameters
670 
671  if (numEigen <= startingEV) {
672  return startingEV;
673  }
674 
675  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
676  if (info < 0)
677  return info;
678 
679  int myPid = MyComm.MyPID();
680 
681  if (M == 0) {
682  if (myPid == 0) {
683  cerr << endl;
684  cerr << " !!! The Epetra_Operator object for the mass matrix is not specified !!!" << endl;
685  cerr << endl;
686  }
687  return -7;
688  }
689 
690  if (numBlock*blockSize < numEigen) {
691  if (myPid == 0) {
692  cerr << endl;
693  cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
694  cerr << " the number of eigenvalues !!!\n";
695  cerr << " Number of blocks = " << numBlock << endl;
696  cerr << " Size of blocks = " << blockSize << endl;
697  cerr << " Number of eigenvalues = " << numEigen << endl;
698  cerr << endl;
699  }
700  return -8;
701  }
702 
703  // Get the weight for approximating the M-inverse norm
704  Epetra_Vector *vectWeight = 0;
705  if (normWeight) {
706  vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
707  }
708 
709  int knownEV = startingEV;
710  int localVerbose = verbose*(myPid==0);
711 
712  // Define local block vectors
713  //
714  // PMQ = Preconditioned M-times converged eigenvectors + one working block
715  //
716  // KX = Working vectors (storing K times one block)
717  //
718  // R = Working vectors (storing residuals for one block)
719  //
720  // MX = Working vectors (storing M times one block)
721  //
722 
723  int xr = Q.MyLength();
724  int dimSearch = blockSize*numBlock;
725 
726  Epetra_MultiVector X(View, Q, 0, dimSearch + blockSize);
727  if (knownEV > 0) {
728  Epetra_MultiVector copyX(View, Q, knownEV, blockSize);
729  copyX.Random();
730  }
731  else {
732  X.Random();
733  }
734 
735  int lwork = 0;
736  lwork += (numEigen-startingEV + blockSize)*xr + 2*blockSize*xr;
737  lwork += (M) ? blockSize*xr : 0;
738 
739  // Workspace for PCG
740  lwork += (maxIterLinearSolve > 0) ? 4*xr*blockSize + 6*blockSize + 4*blockSize*blockSize : 0;
741 
742  // Workspace for JDCG
743  lwork += blockSize + dimSearch + 2*dimSearch*dimSearch;
744  lwork += 2*(numEigen-startingEV+blockSize)*(numEigen-startingEV+blockSize);
745  lwork += 2*(numEigen-startingEV+blockSize)*blockSize;
746 
747  double *work = new (nothrow) double[lwork];
748  if (work == 0) {
749  if (vectWeight)
750  delete vectWeight;
751  info = -30;
752  return info;
753  }
754  memRequested += sizeof(double)*lwork/(1024.0*1024.0);
755 
756  highMem = (highMem > currentSize()) ? highMem : currentSize();
757 
758  double *tmpD = work;
759 
760  Epetra_MultiVector PMQ(View, Q.Map(), tmpD, xr, numEigen-startingEV+blockSize);
761  tmpD = tmpD + (numEigen-startingEV+blockSize)*xr;
762 
763  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
764  tmpD = tmpD + blockSize*xr;
765 
766  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
767  tmpD = tmpD + blockSize*xr;
768 
769  Epetra_MultiVector MX(View, Q.Map(), tmpD, xr, blockSize);
770  tmpD = tmpD + blockSize*xr;
771 
772  // Note: Use MX as workspace in PCG iterations
773  double *workPCG = 0;
774  if (maxIterLinearSolve > 0) {
775  workPCG = tmpD - blockSize*xr;
776  tmpD = tmpD + 4*xr*blockSize + 6*blockSize + 4*blockSize*blockSize;
777  }
778 
779  // theta = Store the local eigenvalues (size: dimSearch)
780  //
781  // normR = Store the norm of residuals (size: blockSize)
782  //
783  // KK = Local stiffness matrix (size: dimSearch x dimSearch)
784  //
785  // S = Local eigenvectors (size: dimSearch x dimSearch)
786  //
787  // QtMPMQ = Projected "preconditioner" (size: (numEigen-startingEV+blockSize) ^ 2)
788  //
789  // invQtMPMQ = Inverse of QtMPMQ (size: (numEigen-startingEV+blockSize) ^ 2)
790  //
791  // tmpArray = Temporary workspace (size: 2*(numEigen-startingEV+blockSize) x blockSize)
792 
793  double *theta = tmpD;
794  tmpD = tmpD + dimSearch;
795 
796  double *normR = tmpD;
797  tmpD = tmpD + blockSize;
798 
799  double *KK = tmpD;
800  tmpD = tmpD + dimSearch*dimSearch;
801  memset(KK, 0, dimSearch*dimSearch*sizeof(double));
802 
803  double *S = tmpD;
804  tmpD = tmpD + dimSearch*dimSearch;
805 
806  double *QtMPMQ = tmpD;
807  tmpD = tmpD + (numEigen-startingEV+blockSize)*(numEigen-startingEV+blockSize);
808  int ldQtMPMQ = numEigen - startingEV + blockSize;
809 
810  double *invQtMPMQ = tmpD;
811  tmpD = tmpD + (numEigen-startingEV+blockSize)*(numEigen-startingEV+blockSize);
812 
813  double *tmpArray = tmpD;
814 
815  // Define an array to store the residuals history
816  if (localVerbose > 2) {
817  resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
818  spaceSizeHistory = new (nothrow) int[maxIterEigenSolve];
819  iterPCGHistory = new (nothrow) int[maxIterEigenSolve];
820  if ((resHistory == 0) || (spaceSizeHistory == 0) || (iterPCGHistory == 0)) {
821  if (vectWeight)
822  delete vectWeight;
823  delete[] work;
824  info = -30;
825  return info;
826  }
827  historyCount = 0;
828  }
829 
830  // Miscellaneous definitions
831  bool reStart = false;
832  bool criticalExit = false;
833 
834  int i, j;
835  int nFound = blockSize;
836  int bStart = 0;
837  int offSet = 0;
838  numRestart = 0;
839 
840  double tau = 0.0;
841  double eta = tau;
842  double sqrtTol = sqrt(tolEigenSolve);
843  double coefDecay = 0.5;
844  double tolPCG = 1.0;
845 
846  if (localVerbose > 0) {
847  cout << endl;
848  cout << " *|* Problem: ";
849  if (M)
850  cout << "K*Q = M*Q D ";
851  else
852  cout << "K*Q = Q D ";
853  if (Prec)
854  cout << " with preconditioner";
855  cout << endl;
856  cout << " *|* Algorithm = Jacobi-Davidson algorithm with PCG (block version)\n";
857  cout << " *|* Size of blocks = " << blockSize << endl;
858  cout << " *|* Largest size of search space = " << numBlock*blockSize << endl;
859  cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
860  cout.precision(2);
861  cout.setf(ios::scientific, ios::floatfield);
862  cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
863  cout << " *|* Norm used for convergence: ";
864  if (vectWeight)
865  cout << "weighted L2-norm with user-provided weights" << endl;
866  else
867  cout << "L^2-norm" << endl;
868  if (startingEV > 0)
869  cout << " *|* Input converged eigenvectors = " << startingEV << endl;
870  cout << "\n -- Start iterations -- \n";
871  }
872 
873  int maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
874 
875  timeOuterLoop -= MyWatch.WallTime();
876  outerIter = 0;
877  while (outerIter <= maxIterEigenSolve) {
878 
879  highMem = (highMem > currentSize()) ? highMem : currentSize();
880 
881  Epetra_MultiVector PMX(View, PMQ, knownEV - startingEV, blockSize);
882 
883  int nb;
884  for (nb = bStart; nb < maxBlock; ++nb) {
885 
886  outerIter += 1;
887  if (outerIter > maxIterEigenSolve)
888  break;
889 
890  int localSize = nb*blockSize;
891 
892  Epetra_MultiVector Xcurrent(View, X, localSize + knownEV, blockSize);
893 
894  timeMassOp -= MyWatch.WallTime();
895  if (M)
896  M->Apply(Xcurrent, MX);
897  timeMassOp += MyWatch.WallTime();
898  massOp += blockSize;
899 
900  // Orthonormalize X against the known eigenvectors and the previous vectors
901  // Note: Use R as a temporary work space
902  timeOrtho -= MyWatch.WallTime();
903  if (nb == bStart) {
904  if (nFound > 0) {
905  if (knownEV == 0) {
906  info = modalTool.massOrthonormalize(Xcurrent, MX, M, Q, nFound, 2, R.Values());
907  }
908  else {
909  if (localSize == 0) {
910  Epetra_MultiVector copyQ(View, X, 0, knownEV);
911  info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values());
912  }
913  else {
914  Epetra_MultiVector copyQ(View, X, knownEV, localSize);
915  info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values());
916  } // if (localSize == 0)
917  } // if (knownEV == 0)
918  } // if (nFound > 0)
919  nFound = 0;
920  }
921  else {
922  Epetra_MultiVector copyQ(View, X, knownEV, localSize);
923  info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, blockSize, 0, R.Values());
924  }
925  timeOrtho += MyWatch.WallTime();
926 
927  // Exit the code when the number of vectors exceeds the space dimension
928  if (info < 0) {
929  info = -10;
930  if (vectWeight)
931  delete vectWeight;
932  delete[] work;
933  if (workPCG)
934  delete[] workPCG;
935  return info;
936  }
937 
938  timeStifOp -= MyWatch.WallTime();
939  K->Apply(Xcurrent, KX);
940  timeStifOp += MyWatch.WallTime();
941  stifOp += blockSize;
942 
943  if (verbose > 3) {
944  if (knownEV + localSize == 0)
945  accuracyCheck(&Xcurrent, &MX, 0);
946  else {
947  Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
948  accuracyCheck(&Xcurrent, &MX, &copyQ);
949  }
950  if (localVerbose > 0)
951  cout << endl;
952  } // if (verbose > 3)
953 
954  // Define the local stiffness matrix
955  timeLocalProj -= MyWatch.WallTime();
956  for (j = 0; j <= nb; ++j) {
957  callBLAS.GEMM('T', 'N', blockSize, blockSize, xr,
958  1.0, X.Values()+(knownEV+j*blockSize)*xr, xr, KX.Values(), xr,
959  0.0, tmpArray + blockSize*blockSize, blockSize);
960  MyComm.SumAll(tmpArray + blockSize*blockSize, tmpArray, blockSize*blockSize);
961  int iC;
962  for (iC = 0; iC < blockSize; ++iC) {
963  double *Kpointer = KK + localSize*dimSearch + j*blockSize + iC*dimSearch;
964  memcpy(Kpointer, tmpArray + iC*blockSize, blockSize*sizeof(double));
965  } // for (iC = 0; iC < blockSize; ++iC)
966  } // for (j = 0; j <= nb; ++j)
967  timeLocalProj += MyWatch.WallTime();
968 
969  // Perform a spectral decomposition
970  timeLocalSolve -= MyWatch.WallTime();
971  int nevLocal = localSize + blockSize;
972  info = modalTool.directSolver(localSize + blockSize, KK, dimSearch, 0, 0,
973  nevLocal, S, dimSearch, theta, localVerbose, 10);
974  timeLocalSolve += MyWatch.WallTime();
975 
976  if ((info != 0) && (theta[0] < 0.0)) {
977  // Stop as spectral decomposition has a critical failure
978  if (info < 0) {
979  criticalExit = true;
980  break;
981  }
982  // Restart as spectral decomposition failed
983  if (localVerbose > 0) {
984  cout << " Iteration " << outerIter;
985  cout << "- Failure for spectral decomposition - RESTART with new random search\n";
986  }
987  reStart = true;
988  numRestart += 1;
989  timeRestart -= MyWatch.WallTime();
990  Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
991  Xinit.Random();
992  timeRestart += MyWatch.WallTime();
993  nFound = blockSize;
994  bStart = 0;
995  break;
996  } // if (info != 0)
997 
998  // Start the definition of the new block
999  // Note: Use KX as a workspace to store the updated directions
1000  timeLocalUpdate -= MyWatch.WallTime();
1001  callBLAS.GEMM('N', 'N', xr, blockSize, localSize+blockSize, 1.0, X.Values()+knownEV*xr, xr,
1002  S, dimSearch, 0.0, KX.Values(), xr);
1003  timeLocalUpdate += MyWatch.WallTime();
1004 
1005  // Apply the mass matrix on the new block
1006  timeMassOp -= MyWatch.WallTime();
1007  if (M)
1008  M->Apply(KX, MX);
1009  timeMassOp += MyWatch.WallTime();
1010  massOp += blockSize;
1011 
1012  // Apply the stiffness matrix on the new block
1013  timeStifOp -= MyWatch.WallTime();
1014  K->Apply(KX, R);
1015  timeStifOp += MyWatch.WallTime();
1016  stifOp += blockSize;
1017 
1018  // Form the residuals
1019  timeResidual -= MyWatch.WallTime();
1020  for (j = 0; j < blockSize; ++j) {
1021  callBLAS.AXPY(xr, -theta[j], MX.Values()+j*xr, R.Values()+j*xr);
1022  }
1023  timeResidual += MyWatch.WallTime();
1024  residual += blockSize;
1025 
1026  // Compute the norm of residuals
1027  timeNorm -= MyWatch.WallTime();
1028  if (vectWeight)
1029  R.NormWeighted(*vectWeight, normR);
1030  else
1031  R.Norm2(normR);
1032  // Scale the norms of residuals with the eigenvalues
1033  // Count the number of converged eigenvectors
1034  nFound = 0;
1035  for (j = 0; j < blockSize; ++j) {
1036  normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
1037  if (normR[j] < tolEigenSolve) {
1038  nFound += 1;
1039  }
1040  } // for (j = 0; j < blockSize; ++j)
1041  timeNorm += MyWatch.WallTime();
1042 
1043  maxSpaceSize = (maxSpaceSize > localSize+blockSize) ? maxSpaceSize : localSize+blockSize;
1044  sumSpaceSize += localSize + blockSize;
1045 
1046  // Print information on current iteration
1047  if (localVerbose > 0) {
1048  cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
1049  cout << knownEV + nFound << endl;
1050  }
1051 
1052  if (localVerbose > 2) {
1053  memcpy(resHistory + blockSize*historyCount, normR, blockSize*sizeof(double));
1054  spaceSizeHistory[historyCount] = localSize + blockSize;
1055  }
1056 
1057  if (localVerbose > 1) {
1058  cout << endl;
1059  cout.precision(2);
1060  cout.setf(ios::scientific, ios::floatfield);
1061  for (i=0; i<blockSize; ++i) {
1062  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
1063  cout << " = " << normR[i] << endl;
1064  }
1065  cout << endl;
1066  cout.precision(2);
1067  for (i=0; i<localSize; ++i) {
1068  cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
1069  cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
1070  cout << " = " << theta[i] << endl;
1071  }
1072  cout << endl;
1073  }
1074 
1075  // Exit the loop when some vectors have converged
1076  if (nFound > 0) {
1077  tolPCG = 1.0;
1078  offSet = 0;
1079  nb += 1;
1080  if (localVerbose > 2) {
1081  iterPCGHistory[historyCount] = 0;
1082  historyCount += 1;
1083  }
1084  break;
1085  }
1086 
1087  // Exit the loop when all positions are filled
1088  if ((maxBlock > 1) && (nb == maxBlock - 1)) {
1089  if (localVerbose > 2) {
1090  iterPCGHistory[historyCount] = 0;
1091  historyCount += 1;
1092  }
1093  continue;
1094  }
1095 
1096  // Apply the preconditioner on the new direction
1097  if (Prec) {
1098  timePrecOp -= MyWatch.WallTime();
1099  Prec->ApplyInverse(MX, PMX);
1100  timePrecOp += MyWatch.WallTime();
1101  precOp += blockSize;
1102  } // if (Prec)
1103  else {
1104  memcpy(PMX.Values(), MX.Values(), xr*blockSize*sizeof(double));
1105  } // if (Prec)
1106 
1107  // Update the upper triangular part of the matrix QtMPMQ
1108  // Note: Use tmpArray as a workspace
1109  timeBuildQtMPMQ -= MyWatch.WallTime();
1110  int qLength = knownEV - startingEV + blockSize;
1111  callBLAS.GEMM('T', 'N', qLength, blockSize, xr, 1.0, PMQ.Values(), xr, MX.Values(), xr,
1112  0.0, tmpArray + qLength*blockSize, qLength);
1113  MyComm.SumAll(tmpArray + qLength*blockSize, tmpArray, qLength*blockSize);
1114  for (j = 0; j < blockSize; ++j) {
1115  memcpy(QtMPMQ + (knownEV-startingEV+j)*ldQtMPMQ, tmpArray + j*qLength,
1116  qLength*sizeof(double));
1117  }
1118  timeBuildQtMPMQ += MyWatch.WallTime();
1119 
1120  // Factor the matrix QtMPMQ
1121  for (j = 0; j < qLength; ++j)
1122  memcpy(invQtMPMQ + j*ldQtMPMQ, QtMPMQ + j*ldQtMPMQ, (j+1)*sizeof(double));
1123  callLAPACK.POTRF('U', qLength, invQtMPMQ, ldQtMPMQ, &info);
1124 
1125  // Treat the error messages for Cholesky factorization
1126  if (info != 0) {
1127  // mfh 14 Jan 2011: INFO < 0 is definitely a logic error.
1128  TEUCHOS_TEST_FOR_EXCEPTION(info < 0, std::logic_error, "Argument number "
1129  << -info << " of LAPACK's DPOTRF routine had "
1130  "an illegal value.");
1131  // Restart as factorization failed
1132  if (localVerbose > 0) {
1133  cout << " Iteration " << outerIter;
1134  cout << " - Failure for local factorization (" << info << "/" << qLength << ")";
1135  cout << " - RESTART with new random search";
1136  cout << endl;
1137  }
1138  reStart = true;
1139  numRestart += 1;
1140  timeRestart -= MyWatch.WallTime();
1141  Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
1142  Xinit.Random();
1143  timeRestart += MyWatch.WallTime();
1144  nFound = blockSize;
1145  bStart = 0;
1146  if (localVerbose > 2) {
1147  iterPCGHistory[historyCount] = 0;
1148  historyCount += 1;
1149  }
1150  break;
1151  }
1152 
1153  // Correction equation
1154  // Note that the new working block U is stored in KX,
1155  // while MX contains M*U and PMX contains P^{-1}*M*U
1156  Epetra_MultiVector Xnext(View, X, (maxBlock == 1) ? knownEV
1157  : knownEV+localSize+blockSize, blockSize);
1158  if (normR[0] < sqrtTol)
1159  eta = theta[0];
1160  else
1161  eta = tau;
1162 
1163  if (verbose > 3) {
1164  double maxDotQ = 0.0;
1165  if (knownEV > 0) {
1166  Epetra_MultiVector copyQ(View, X, 0, knownEV);
1167  maxDotQ = myVerify.errorOrthogonality(&copyQ, &R, 0);
1168  }
1169  double maxDotU = myVerify.errorOrthogonality(&KX, &R, 0);
1170  if (myPid == 0) {
1171  cout << " >> Orthogonality for the right hand side of the correction equation = ";
1172  double tmp = (maxDotU > maxDotQ) ? maxDotU : maxDotQ;
1173  cout.precision(4);
1174  cout.setf(ios::scientific, ios::floatfield);
1175  cout << tmp << endl << endl;
1176  }
1177  }
1178 
1179  timeCorrectionSolve -= MyWatch.WallTime();
1180  if (startingEV > 0) {
1181  Epetra_MultiVector startQ(View, X, 0, startingEV);
1182  if (knownEV > startingEV) {
1183  Epetra_MultiVector copyQ(View, X, startingEV, knownEV - startingEV);
1184  info = jacobiPCG(R, Xnext, &KX, &copyQ, eta, tolPCG, maxIterLinearSolve, invQtMPMQ,
1185  ldQtMPMQ, PMQ.Values(), tmpArray, workPCG, vectWeight, &startQ);
1186  }
1187  else {
1188  info = jacobiPCG(R, Xnext, &KX, 0, eta, tolPCG, maxIterLinearSolve, invQtMPMQ,
1189  ldQtMPMQ, PMQ.Values(), tmpArray, workPCG, vectWeight, &startQ);
1190  }
1191  }
1192  else {
1193  if (knownEV > 0) {
1194  Epetra_MultiVector copyQ(View, X, 0, knownEV);
1195  info = jacobiPCG(R, Xnext, &KX, &copyQ, eta, tolPCG, maxIterLinearSolve, invQtMPMQ,
1196  ldQtMPMQ, PMQ.Values(), tmpArray, workPCG, vectWeight);
1197  }
1198  else {
1199  info = jacobiPCG(R, Xnext, &KX, 0, eta, tolPCG, maxIterLinearSolve, invQtMPMQ,
1200  ldQtMPMQ, PMQ.Values(), tmpArray, workPCG, vectWeight);
1201  }
1202  }
1203  timeCorrectionSolve += MyWatch.WallTime();
1204 
1205  if (verbose > 3) {
1206  double maxDotQ = 0.0;
1207  if (knownEV > 0) {
1208  Epetra_MultiVector copyQ(View, X, 0, knownEV);
1209  maxDotQ = myVerify.errorOrthogonality(&copyQ, &Xnext, M);
1210  }
1211  double maxDotU = myVerify.errorOrthogonality(&KX, &Xnext, M);
1212  if (myPid == 0) {
1213  double tmp = (maxDotU > maxDotQ) ? maxDotU : maxDotQ;
1214  cout << " >> Orthogonality for the solution of the correction equation = ";
1215  cout.precision(4);
1216  cout.setf(ios::scientific, ios::floatfield);
1217  cout << tmp << endl << endl;
1218  }
1219  }
1220 
1221  numCorrectionSolve += blockSize;
1222  if (info < 0) {
1223  if ((info == -1) || (info == -maxIterLinearSolve-1)) {
1224  if (localVerbose > 0) {
1225  cout << " Iteration " << outerIter;
1226  cout << " - Failure inside PCG";
1227  cout << " - RESTART with new random search";
1228  cout << endl;
1229  }
1230  if (localVerbose > 2) {
1231  iterPCGHistory[historyCount] = -1;
1232  historyCount += 1;
1233  }
1234  reStart = true;
1235  numRestart += 1;
1236  timeRestart -= MyWatch.WallTime();
1237  Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
1238  Xinit.Random();
1239  timeRestart += MyWatch.WallTime();
1240  nFound = blockSize;
1241  bStart = 0;
1242  info = 0;
1243  break;
1244  } // if ((info == -1) || (info == -iterMax-1))
1245  else {
1246  if (localVerbose > 2) {
1247  iterPCGHistory[historyCount] = (info < 0) ? -info : info;
1248  historyCount += 1;
1249  }
1250  } // if ((info == -1) || (info == -iterMax-1))
1251  } // if (info < 0)
1252  else {
1253  if (localVerbose > 2) {
1254  iterPCGHistory[historyCount] = info;
1255  historyCount += 1;
1256  }
1257  } // if (info < 0)
1258  info = 0;
1259 
1260  tolPCG *= coefDecay;
1261 
1262  if (maxBlock == 1) {
1263  Xcurrent.Update(1.0, KX, -1.0);
1264  }
1265 
1266  } // for (nb = bstart; nb < maxBlock; ++nb)
1267 
1268  if (outerIter > maxIterEigenSolve)
1269  break;
1270 
1271  if (reStart == true) {
1272  reStart = false;
1273  continue;
1274  }
1275 
1276  if (criticalExit == true)
1277  break;
1278 
1279  // Store the final converged eigenvectors
1280  if (knownEV + nFound >= numEigen) {
1281  for (j = 0; j < blockSize; ++j) {
1282  if (normR[j] < tolEigenSolve) {
1283  memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
1284  lambda[knownEV] = theta[j];
1285  knownEV += 1;
1286  }
1287  }
1288  if (localVerbose == 1) {
1289  cout << endl;
1290  cout.precision(2);
1291  cout.setf(ios::scientific, ios::floatfield);
1292  for (i=0; i<blockSize; ++i) {
1293  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
1294  cout << " = " << normR[i] << endl;
1295  }
1296  cout << endl;
1297  }
1298  break;
1299  } // if (knownEV + nFound >= numEigen)
1300 
1301  // Treat the particular case of one block
1302  if ((maxBlock == 1) && (nFound == 0)) {
1303  nFound = blockSize;
1304  continue;
1305  }
1306 
1307  // Define the restarting space when there is only one block
1308  if (maxBlock == 1) {
1309  timeRestart -= MyWatch.WallTime();
1310  double *Xpointer = X.Values() + (knownEV+nFound)*xr;
1311  nFound = 0;
1312  for (j = 0; j < blockSize; ++j) {
1313  if (normR[j] < tolEigenSolve) {
1314  memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
1315  lambda[knownEV] = theta[j];
1316  knownEV += 1;
1317  nFound += 1;
1318  }
1319  else {
1320  memcpy(Xpointer + (j-nFound)*xr, KX.Values() + j*xr, xr*sizeof(double));
1321  }
1322  }
1323  knownEV -= nFound;
1324  Epetra_MultiVector Xnext(View, X, knownEV + blockSize, nFound);
1325  Xnext.Random();
1326  timeRestart += MyWatch.WallTime();
1327  }
1328 
1329  // Define the restarting block when there is more than one block
1330  int oldCol, newCol;
1331  if (maxBlock > 1) {
1332  timeRestart -= MyWatch.WallTime();
1333  int firstIndex = blockSize;
1334  for (j = 0; j < blockSize; ++j) {
1335  if (normR[j] >= tolEigenSolve) {
1336  firstIndex = j;
1337  break;
1338  }
1339  } // for (j = 0; j < blockSize; ++j)
1340  while (firstIndex < nFound) {
1341  for (j = firstIndex; j < blockSize; ++j) {
1342  if (normR[j] < tolEigenSolve) {
1343  // Swap the j-th and firstIndex-th position
1344  callFortran.SWAP(nb*blockSize, S + j*dimSearch, 1, S + firstIndex*dimSearch, 1);
1345  callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
1346  callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
1347  break;
1348  }
1349  } // for (j = firstIndex; j < blockSize; ++j)
1350  for (j = 0; j < blockSize; ++j) {
1351  if (normR[j] >= tolEigenSolve) {
1352  firstIndex = j;
1353  break;
1354  }
1355  } // for (j = 0; j < blockSize; ++j)
1356  } // while (firstIndex < nFound)
1357 
1358  // Copy the converged eigenvalues
1359  memcpy(lambda + knownEV, theta, nFound*sizeof(double));
1360 
1361  // Define the restarting size
1362  bStart = ((nb - offSet) > 2) ? (nb - offSet)/2 : 0;
1363 
1364  // Define the restarting space and local stiffness
1365  memset(KK, 0, nb*blockSize*dimSearch*sizeof(double));
1366  for (j = 0; j < bStart*blockSize; ++j) {
1367  KK[j + j*dimSearch] = theta[j + nFound];
1368  }
1369  // Form the restarting space
1370  oldCol = nb*blockSize;
1371  newCol = nFound + (bStart+1)*blockSize;
1372  newCol = (newCol > oldCol) ? oldCol : newCol;
1373  callFortran.GEQRF(oldCol, newCol, S, dimSearch, theta, R.Values(), xr*blockSize, &info);
1374  callFortran.ORMQR('R', 'N', xr, oldCol, newCol, S, dimSearch, theta,
1375  X.Values()+knownEV*xr, xr, R.Values(), xr*blockSize, &info);
1376  // Put random vectors if the Rayleigh Ritz vectors are not enough
1377  newCol = nFound + (bStart+1)*blockSize;
1378  if (newCol > oldCol) {
1379  Epetra_MultiVector Xnext(View, X, knownEV+blockSize-nFound, nFound);
1380  Xnext.Random();
1381  }
1382  timeRestart += MyWatch.WallTime();
1383  } // if (maxBlock > 1)
1384 
1385  if (nFound > 0) {
1386  // Update MQ
1387  Epetra_MultiVector Qconv(View, X, knownEV, nFound);
1388  Epetra_MultiVector MQconv(View, MX, 0, nFound);
1389  timeMassOp -= MyWatch.WallTime();
1390  if (M)
1391  M->Apply(Qconv, MQconv);
1392  timeMassOp += MyWatch.WallTime();
1393  massOp += nFound;
1394  // Update PMQ
1395  Epetra_MultiVector PMQconv(View, PMQ, knownEV-startingEV, nFound);
1396  if (Prec) {
1397  timePrecOp -= MyWatch.WallTime();
1398  Prec->ApplyInverse(MQconv, PMQconv);
1399  timePrecOp += MyWatch.WallTime();
1400  precOp += nFound;
1401  }
1402  else {
1403  memcpy(PMQconv.Values(), MQconv.Values(), xr*nFound*sizeof(double));
1404  }
1405  // Update QtMPMQ
1406  // Note: Use tmpArray as workspace
1407  int newEV = knownEV - startingEV;
1408  timeBuildQtMPMQ -= MyWatch.WallTime();
1409  callBLAS.GEMM('T', 'N', newEV + nFound, nFound, xr, 1.0, PMQ.Values(), xr,
1410  MX.Values(), xr, 0.0, QtMPMQ + newEV*ldQtMPMQ, newEV + nFound);
1411  MyComm.SumAll(QtMPMQ + newEV*ldQtMPMQ, tmpArray, (newEV + nFound)*nFound);
1412  for (j = 0; j < nFound; ++j) {
1413  memcpy(QtMPMQ + (newEV+j)*ldQtMPMQ, tmpArray + j*(newEV+nFound),
1414  (newEV+nFound)*sizeof(double));
1415  }
1416  timeBuildQtMPMQ += MyWatch.WallTime();
1417  } // if (nFound > 0)
1418  else {
1419  offSet += 1;
1420  } // if (nFound > 0)
1421 
1422  knownEV += nFound;
1423  maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
1424 
1425  // The value of nFound commands how many vectors will be orthogonalized.
1426  if ((maxBlock > 1) && (newCol <= oldCol))
1427  nFound = 0;
1428 
1429  } // while (outerIter <= maxIterEigenSolve)
1430  timeOuterLoop += MyWatch.WallTime();
1431  highMem = (highMem > currentSize()) ? highMem : currentSize();
1432 
1433  // Clean memory
1434  delete[] work;
1435  if (vectWeight)
1436  delete vectWeight;
1437 
1438  // Sort the eigenpairs
1439  timePostProce -= MyWatch.WallTime();
1440  if ((info == 0) && (knownEV > 0)) {
1441  mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
1442  }
1443  timePostProce += MyWatch.WallTime();
1444 
1445  return (info == 0) ? knownEV : info;
1446 
1447 }
1448 
1449 
1450 void JDPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
1451  const Epetra_MultiVector *Q) const {
1452 
1453  cout.precision(2);
1454  cout.setf(ios::scientific, ios::floatfield);
1455  double tmp;
1456 
1457  int myPid = MyComm.MyPID();
1458 
1459  if (X) {
1460  if (M) {
1461  if (MX) {
1462  tmp = myVerify.errorEquality(X, MX, M);
1463  if (myPid == 0)
1464  cout << " >> Difference between MX and M*X = " << tmp << endl;
1465  }
1466  tmp = myVerify.errorOrthonormality(X, M);
1467  if (myPid == 0)
1468  cout << " >> Error in X^T M X - I = " << tmp << endl;
1469  }
1470  else {
1471  tmp = myVerify.errorOrthonormality(X, 0);
1472  if (myPid == 0)
1473  cout << " >> Error in X^T X - I = " << tmp << endl;
1474  }
1475  }
1476 
1477  if (Q == 0)
1478  return;
1479 
1480  if (M) {
1481  tmp = myVerify.errorOrthonormality(Q, M);
1482  if (myPid == 0)
1483  cout << " >> Error in Q^T M Q - I = " << tmp << endl;
1484  if (X) {
1485  tmp = myVerify.errorOrthogonality(Q, X, M);
1486  if (myPid == 0)
1487  cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
1488  }
1489  }
1490  else {
1491  tmp = myVerify.errorOrthonormality(Q, 0);
1492  if (myPid == 0)
1493  cout << " >> Error in Q^T Q - I = " << tmp << endl;
1494  if (X) {
1495  tmp = myVerify.errorOrthogonality(Q, X, 0);
1496  if (myPid == 0)
1497  cout << " >> Orthogonality Q^T X up to " << tmp << endl;
1498  }
1499  }
1500 
1501 }
1502 
1503 
1504 int JDPCG::minimumSpaceDimension(int nev) const {
1505 
1506  int myPid = MyComm.MyPID();
1507 
1508  if ((myPid == 0) && (numBlock*blockSize < nev)) {
1509  cerr << endl;
1510  cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
1511  cerr << " the number of eigenvalues !!!\n";
1512  cerr << " Number of blocks = " << numBlock << endl;
1513  cerr << " Size of blocks = " << blockSize << endl;
1514  cerr << " Number of eigenvalues = " << nev << endl;
1515  cerr << endl;
1516  return -1;
1517  }
1518 
1519  return nev + blockSize;
1520 
1521 }
1522 
1523 
1524 void JDPCG::initializeCounters() {
1525 
1526  historyCount = 0;
1527  if (resHistory) {
1528  delete[] resHistory;
1529  resHistory = 0;
1530  }
1531 
1532  maxSpaceSize = 0;
1533  sumSpaceSize = 0;
1534  if (spaceSizeHistory) {
1535  delete[] spaceSizeHistory;
1536  spaceSizeHistory = 0;
1537  }
1538 
1539  maxIterPCG = 0;
1540  sumIterPCG = 0;
1541  if (iterPCGHistory) {
1542  delete[] iterPCGHistory;
1543  iterPCGHistory = 0;
1544  }
1545 
1546  memRequested = 0.0;
1547  highMem = 0.0;
1548 
1549  massOp = 0;
1550  numCorrectionPrec = 0;
1551  numCorrectionSolve = 0;
1552  numPCGmassOp = 0;
1553  numPCGstifOp = 0;
1554  numRestart = 0;
1555  outerIter = 0;
1556  precOp = 0;
1557  residual = 0;
1558  stifOp = 0;
1559 
1560  timeBuildQtMPMQ = 0.0;
1561  timeCorrectionPrec = 0.0;
1562  timeCorrectionSolve = 0.0;
1563  timeLocalProj = 0.0;
1564  timeLocalSolve = 0.0;
1565  timeLocalUpdate = 0.0;
1566  timeMassOp = 0.0;
1567  timeNorm = 0.0;
1568  timeOrtho = 0.0;
1569  timeOuterLoop = 0.0;
1570  timePCGEigCheck = 0.0;
1571  timePCGLoop = 0.0;
1572  timePCGOpMult = 0.0;
1573  timePCGPrec = 0.0;
1574  timePostProce = 0.0;
1575  timePrecOp = 0.0;
1576  timeResidual = 0.0;
1577  timeRestart = 0.0;
1578  timeStifOp = 0.0;
1579 
1580 }
1581 
1582 
1583 void JDPCG::algorithmInfo() const {
1584 
1585  int myPid = MyComm.MyPID();
1586 
1587  if (myPid ==0) {
1588  cout << " Algorithm: Jacobi-Davidson algorithm with PCG (block version)\n";
1589  cout << " Block Size: " << blockSize << endl;
1590  cout << " Number of Blocks kept: " << numBlock << endl;
1591  }
1592 
1593 }
1594 
1595 
1596 void JDPCG::historyInfo() const {
1597 
1598  if (resHistory) {
1599  int j;
1600  cout << " Block Size: " << blockSize << endl;
1601  cout << endl;
1602  cout << " Residuals Search Space Dim. Inner Iter. \n";
1603  cout << endl;
1604  cout.precision(4);
1605  cout.setf(ios::scientific, ios::floatfield);
1606  for (j = 0; j < historyCount; ++j) {
1607  int ii;
1608  for (ii = 0; ii < blockSize; ++ii) {
1609  cout << resHistory[blockSize*j + ii] << " ";
1610  cout.width(4);
1611  cout << spaceSizeHistory[j] << " ";
1612  cout.width(4);
1613  cout << iterPCGHistory[j] << endl;
1614  }
1615  }
1616  cout << endl;
1617  }
1618 
1619 }
1620 
1621 
1622 void JDPCG::memoryInfo() const {
1623 
1624  int myPid = MyComm.MyPID();
1625 
1626  double maxHighMem = 0.0;
1627  double tmp = highMem;
1628  MyComm.MaxAll(&tmp, &maxHighMem, 1);
1629 
1630  double maxMemRequested = 0.0;
1631  tmp = memRequested;
1632  MyComm.MaxAll(&tmp, &maxMemRequested, 1);
1633 
1634  if (myPid == 0) {
1635  cout.precision(2);
1636  cout.setf(ios::fixed, ios::floatfield);
1637  cout << " Memory requested per processor by the eigensolver = (EST) ";
1638  cout.width(6);
1639  cout << maxMemRequested << " MB " << endl;
1640  cout << endl;
1641  cout << " High water mark in eigensolver = (EST) ";
1642  cout.width(6);
1643  cout << maxHighMem << " MB " << endl;
1644  cout << endl;
1645  }
1646 
1647 }
1648 
1649 
1650 void JDPCG::operationInfo() const {
1651 
1652  int myPid = MyComm.MyPID();
1653 
1654  if (myPid == 0) {
1655  cout << " --- Operations ---\n\n";
1656  cout << " Total number of mass matrix multiplications = ";
1657  cout.width(9);
1658  int tmp = massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult();
1659  tmp += numPCGmassOp;
1660  cout << tmp << endl;
1661  cout << " Mass multiplications in solver loop = ";
1662  cout.width(9);
1663  cout << massOp << endl;
1664  cout << " Mass multiplications for orthogonalization = ";
1665  cout.width(9);
1666  cout << modalTool.getNumProj_MassMult() << endl;
1667  cout << " Mass multiplications for normalization = ";
1668  cout.width(9);
1669  cout << modalTool.getNumNorm_MassMult() << endl;
1670  cout << " Mass multiplications in PCG loop = ";
1671  cout.width(9);
1672  cout << numPCGmassOp << endl;
1673  cout << endl;
1674  cout << " Total number of stiffness matrix operations = ";
1675  cout.width(9);
1676  cout << stifOp + numPCGstifOp << endl;
1677  cout << " Stiffness multiplications in solver loop = ";
1678  cout.width(9);
1679  cout << stifOp << endl;
1680  cout << " Stiffness multiplications in PCG loop = ";
1681  cout.width(9);
1682  cout << numPCGstifOp << endl;
1683  cout << endl;
1684  cout << " Total number of preconditioner applications = ";
1685  cout.width(9);
1686  cout << precOp << endl;
1687  cout << endl;
1688  cout << " Total number of PCG solve = ";
1689  cout.width(9);
1690  cout << numCorrectionSolve << endl;
1691  cout << " Number of projected precond. appl. : ";
1692  cout.width(9);
1693  cout << numCorrectionPrec << endl;
1694  cout.setf(ios::fixed, ios::floatfield);
1695  cout.precision(2);
1696  cout.width(9);
1697  cout << " Average number of iter. per solve : ";
1698  cout.width(9);
1699  cout << ((double) sumIterPCG)*blockSize/((double) numCorrectionSolve) << endl;
1700  cout << " Maximum number of iter. per solve : ";
1701  cout.width(9);
1702  cout << maxIterPCG << endl;
1703  cout << endl;
1704  cout << " Total number of computed eigen-residuals = ";
1705  cout.width(9);
1706  cout << residual << endl;
1707  cout << endl;
1708  cout << " Total number of outer iterations = ";
1709  cout.width(9);
1710  cout << outerIter << endl;
1711  cout << " Number of restarts = ";
1712  cout.width(9);
1713  cout << numRestart << endl;
1714  cout << endl;
1715  cout << " Maximum size of search space = ";
1716  cout.width(9);
1717  cout << maxSpaceSize << endl;
1718  cout << " Average size of search space = ";
1719  cout.setf(ios::fixed, ios::floatfield);
1720  cout.precision(2);
1721  cout.width(9);
1722  cout << ((double) sumSpaceSize)/((double) outerIter) << endl;
1723  cout << endl;
1724  }
1725 
1726 }
1727 
1728 
1729 void JDPCG::timeInfo() const {
1730 
1731  int myPid = MyComm.MyPID();
1732 
1733  if (myPid == 0) {
1734  cout << " --- Timings ---\n\n";
1735  cout.setf(ios::fixed, ios::floatfield);
1736  cout.precision(2);
1737  cout << " Total time for outer loop = ";
1738  cout.width(9);
1739  cout << timeOuterLoop << " s" << endl;
1740  cout << " Time for mass matrix operations = ";
1741  cout.width(9);
1742  cout << timeMassOp << " s ";
1743  cout.width(5);
1744  cout << 100*timeMassOp/timeOuterLoop << " %\n";
1745  cout << " Time for stiffness matrix operations = ";
1746  cout.width(9);
1747  cout << timeStifOp << " s ";
1748  cout.width(5);
1749  cout << 100*timeStifOp/timeOuterLoop << " %\n";
1750  cout << " Time for preconditioner applications = ";
1751  cout.width(9);
1752  cout << timePrecOp << " s ";
1753  cout.width(5);
1754  cout << 100*timePrecOp/timeOuterLoop << " %\n";
1755  cout << " Time for orthogonalizations = ";
1756  cout.width(9);
1757  cout << timeOrtho << " s ";
1758  cout.width(5);
1759  cout << 100*timeOrtho/timeOuterLoop << " %\n";
1760  cout << " Projection step : ";
1761  cout.width(9);
1762  cout << modalTool.getTimeProj() << " s\n";
1763  cout << " Normalization step : ";
1764  cout.width(9);
1765  cout << modalTool.getTimeNorm() << " s\n";
1766  cout << " Time for local projection = ";
1767  cout.width(9);
1768  cout << timeLocalProj << " s ";
1769  cout.width(5);
1770  cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1771  cout << " Time for local eigensolve = ";
1772  cout.width(9);
1773  cout << timeLocalSolve << " s ";
1774  cout.width(5);
1775  cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1776  cout << " Time for local update = ";
1777  cout.width(9);
1778  cout << timeLocalUpdate << " s ";
1779  cout.width(5);
1780  cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1781  cout << " Time for building the matrix QtMP^{-1}MQ = ";
1782  cout.width(9);
1783  cout << timeBuildQtMPMQ << " s ";
1784  cout.width(5);
1785  cout << 100*timeBuildQtMPMQ/timeOuterLoop << " %\n";
1786  cout << " Time for solving the correction equation = ";
1787  cout.width(9);
1788  cout << timeCorrectionSolve << " s ";
1789  cout.width(5);
1790  cout << 100*timeCorrectionSolve/timeOuterLoop << " %\n";
1791  cout << " Mult. Preconditioner : ";
1792  cout.width(9);
1793  cout << timeCorrectionPrec << " s" << endl;
1794  cout << " Correction of Prec. : ";
1795  cout.width(9);
1796  cout << (timePCGPrec - timeCorrectionPrec) << " s" << endl;
1797  cout << " Shifted Matrix Mult. : ";
1798  cout.width(9);
1799  cout << timePCGOpMult << " s" << endl;
1800  cout << " Eigen-residuals checks : ";
1801  cout.width(9);
1802  cout << timePCGEigCheck << " s" << endl;
1803  cout << " Time for restarting space definition = ";
1804  cout.width(9);
1805  cout << timeRestart << " s ";
1806  cout.width(5);
1807  cout << 100*timeRestart/timeOuterLoop << " %\n";
1808  cout << " Time for residual computations = ";
1809  cout.width(9);
1810  cout << timeResidual << " s ";
1811  cout.width(5);
1812  cout << 100*timeResidual/timeOuterLoop << " %\n";
1813  cout << "\n";
1814  cout << " Total time for post-processing = ";
1815  cout.width(9);
1816  cout << timePostProce << " s\n";
1817  cout << endl;
1818  } // if (myPid == 0)
1819 
1820 }
1821 
1822 
void GEMM(const char TRANSA, const char TRANSB, const int M, const int N, const int K, const float ALPHA, const float *A, const int LDA, const float *B, const int LDB, const float BETA, float *C, const int LDC) const
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)