Anasazi  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
KnyazevLOBPCG.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Anasazi: Block Eigensolvers Package
4 //
5 // Copyright 2004 NTESS and the Anasazi contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 //**************************************************************************
11 //
12 // NOTICE
13 //
14 // This software is a result of the research described in the report
15 //
16 // " A comparison of algorithms for modal analysis in the absence
17 // of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
18 // Sandia National Laboratories, Technical cout SAND2003-1028J.
19 //
20 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
21 // framework ( http://software.sandia.gov/trilinos/ ).
22 //
23 // The distribution of this software follows also the rules defined in Trilinos.
24 // This notice shall be marked on any reproduction of this software, in whole or
25 // in part.
26 //
27 // This program is distributed in the hope that it will be useful, but
28 // WITHOUT ANY WARRANTY; without even the implied warranty of
29 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30 //
31 // Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
32 //
33 //**************************************************************************
34 
35 #include "KnyazevLOBPCG.h"
36 
37 
38 KnyazevLOBPCG::KnyazevLOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
39  const Epetra_Operator *PP,
40  double _tol, int _maxIter, int _verb) :
41  myVerify(_Comm),
42  callBLAS(),
43  callFortran(),
44  modalTool(_Comm),
45  mySort(),
46  MyComm(_Comm),
47  K(KK),
48  M(0),
49  Prec(PP),
50  MyWatch(_Comm),
51  tolEigenSolve(_tol),
52  maxIterEigenSolve(_maxIter),
53  blockSize(0),
54  normWeight(0),
55  verbose(_verb),
56  historyCount(0),
57  resHistory(0),
58  memRequested(0.0),
59  highMem(0.0),
60  massOp(0),
61  numRestart(0),
62  outerIter(0),
63  precOp(0),
64  residual(0),
65  stifOp(0),
66  timeLocalProj(0.0),
67  timeLocalSolve(0.0),
68  timeLocalUpdate(0.0),
69  timeMassOp(0.0),
70  timeNorm(0.0),
71  timeOuterLoop(0.0),
72  timePostProce(0.0),
73  timePrecOp(0.0),
74  timeResidual(0.0),
75  timeStifOp(0.0)
76  {
77 
78 }
79 
80 
81 KnyazevLOBPCG::KnyazevLOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
82  const Epetra_Operator *MM, const Epetra_Operator *PP,
83  double _tol, int _maxIter, int _verb,
84  double *_weight) :
85  myVerify(_Comm),
86  callBLAS(),
87  callFortran(),
88  modalTool(_Comm),
89  mySort(),
90  MyComm(_Comm),
91  K(KK),
92  M(MM),
93  Prec(PP),
94  MyWatch(_Comm),
95  tolEigenSolve(_tol),
96  maxIterEigenSolve(_maxIter),
97  blockSize(0),
98  normWeight(_weight),
99  verbose(_verb),
100  historyCount(0),
101  resHistory(0),
102  memRequested(0.0),
103  highMem(0.0),
104  massOp(0),
105  numRestart(0),
106  outerIter(0),
107  precOp(0),
108  residual(0),
109  stifOp(0),
110  timeLocalProj(0.0),
111  timeLocalSolve(0.0),
112  timeLocalUpdate(0.0),
113  timeMassOp(0.0),
114  timeNorm(0.0),
115  timeOuterLoop(0.0),
116  timePostProce(0.0),
117  timePrecOp(0.0),
118  timeResidual(0.0),
119  timeStifOp(0.0)
120  {
121 
122 }
123 
124 
125 KnyazevLOBPCG::~KnyazevLOBPCG() {
126 
127  if (resHistory) {
128  delete[] resHistory;
129  resHistory = 0;
130  }
131 
132 }
133 
134 
135 int KnyazevLOBPCG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
136 
137  return KnyazevLOBPCG::reSolve(numEigen, Q, lambda);
138 
139 }
140 
141 
142 int KnyazevLOBPCG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
143 
144  // Computes the smallest eigenvalues and the corresponding eigenvectors
145  // of the generalized eigenvalue problem
146  //
147  // K X = M X Lambda
148  //
149  // using a Locally Optimal Block Preconditioned Conjugate Gradient method.
150  //
151  // Note that if M is not specified, then K X = X Lambda is solved.
152  // Note that the blocksize is equal to the number of requested eigenvalues.
153  //
154  // Ref: A. Knyazev, "Toward the optimal preconditioned eigensolver:
155  // Locally optimal block preconditioner conjugate gradient method",
156  // SIAM J. Sci. Comput., vol 23, n 2, pp. 517-541
157  // Ref: A. Knyazev and M. Argentati, "Implementation of a preconditioned
158  // eigensolver using Hypre", Numerical Linear Algebra with Applications (submitted)
159  //
160  // Input variables:
161  //
162  // numEigen (integer) = Number of eigenmodes requested
163  //
164  // Q (Epetra_MultiVector) = Converged eigenvectors
165  // The number of columns of Q must be equal to numEigen.
166  // The rows of Q are distributed across processors.
167  // At exit, the first numEigen columns contain the eigenvectors requested.
168  //
169  // lambda (array of doubles) = Converged eigenvalues
170  // At input, it must be of size numEigen.
171  // At exit, the first numEigen locations contain the eigenvalues requested.
172  //
173  // startingEV (integer) = Number of existing converged eigenmodes
174  //
175  // Return information on status of computation
176  //
177  // info >= 0 >> Number of converged eigenpairs at the end of computation
178  //
179  // // Failure due to input arguments
180  //
181  // info = - 1 >> The stiffness matrix K has not been specified.
182  // info = - 2 >> The maps for the matrix K and the matrix M differ.
183  // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
184  // info = - 4 >> The maps for the vectors and the matrix K differ.
185  // info = - 5 >> Q is too small for the number of eigenvalues requested.
186  // info = - 6 >> Q is too small for the computation parameters.
187  //
188  // info = - 10 >> Failure during the mass orthonormalization
189  //
190  // info = - 20 >> Error in LAPACK during the local eigensolve
191  //
192  // info = - 30 >> MEMORY
193  //
194 
195  // Check the input parameters
196 
197  if (numEigen <= startingEV) {
198  return startingEV;
199  }
200 
201  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen);
202  if (info < 0)
203  return info;
204 
205  int myPid = MyComm.MyPID();
206 
207  // Get the weight for approximating the M-inverse norm
208  Epetra_Vector *vectWeight = 0;
209  if (normWeight) {
210  vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
211  }
212 
213  int knownEV = startingEV;
214  int localVerbose = verbose*(myPid==0);
215 
216  // Define local block vectors
217  //
218  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
219  // KX = Working vectors (storing K*X)
220  //
221  // R = Residuals
222  //
223  // H = Preconditioned search space
224  // MH = Working vectors (storing M*H if M is specified, else pointing to H)
225  // KH = Working vectors (storing K*H)
226  //
227  // P = Search directions
228  // MP = Working vectors (storing M*P if M is specified, else pointing to P)
229  // KP = Working vectors (storing K*P)
230 
231  int xr = Q.MyLength();
232  Epetra_MultiVector X(View, Q, 0, numEigen);
233 
234  blockSize = numEigen;
235 
236  int tmp;
237  tmp = (M == 0) ? 6*numEigen*xr : 9*numEigen*xr;
238 
239  double *work1 = new (nothrow) double[tmp];
240  if (work1 == 0) {
241  if (vectWeight)
242  delete vectWeight;
243  info = -30;
244  return info;
245  }
246  memRequested += sizeof(double)*tmp/(1024.0*1024.0);
247 
248  highMem = (highMem > currentSize()) ? highMem : currentSize();
249 
250  double *tmpD = work1;
251 
252  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, numEigen);
253  tmpD = tmpD + xr*numEigen;
254 
255  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, numEigen);
256  tmpD = (M) ? tmpD + xr*numEigen : tmpD;
257 
258  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, numEigen);
259  tmpD = tmpD + xr*numEigen;
260 
261  Epetra_MultiVector H(View, Q.Map(), tmpD, xr, numEigen);
262  tmpD = tmpD + xr*numEigen;
263 
264  Epetra_MultiVector KH(View, Q.Map(), tmpD, xr, numEigen);
265  tmpD = tmpD + xr*numEigen;
266 
267  Epetra_MultiVector MH(View, Q.Map(), (M) ? tmpD : H.Values(), xr, numEigen);
268  tmpD = (M) ? tmpD + xr*numEigen : tmpD;
269 
270  Epetra_MultiVector P(View, Q.Map(), tmpD, xr, numEigen);
271  tmpD = tmpD + xr*numEigen;
272 
273  Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, numEigen);
274  tmpD = tmpD + xr*numEigen;
275 
276  Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, numEigen);
277 
278  if (startingEV > 0) {
279  // Fill the first vectors of KX and MX
280  Epetra_MultiVector copyX(View, X, 0, startingEV);
281  Epetra_MultiVector copyKX(View, KX, 0, startingEV);
282  timeStifOp -= MyWatch.WallTime();
283  K->Apply(copyX, copyKX);
284  timeStifOp += MyWatch.WallTime();
285  stifOp += startingEV;
286  timeMassOp -= MyWatch.WallTime();
287  if (M) {
288  Epetra_MultiVector copyMX(View, MX, 0, startingEV);
289  M->Apply(copyX, copyMX);
290  }
291  timeMassOp += MyWatch.WallTime();
292  massOp += startingEV;
293  }
294 
295  // Define arrays
296  //
297  // theta = Store the local eigenvalues (size: numEigen)
298  // normR = Store the norm of residuals (size: numEigen)
299  //
300  // MM = Local mass matrix (size: 3*numEigen x 3*numEigen)
301  // KK = Local stiffness matrix (size: 3*numEigen x 3*numEigen)
302  //
303  // S = Local eigenvectors (size: 3*numEigen x 3*numEigen)
304 
305  int lwork2;
306  lwork2 = 2*numEigen + 27*numEigen*numEigen;
307  double *work2 = new (nothrow) double[lwork2];
308  if (work2 == 0) {
309  if (vectWeight)
310  delete vectWeight;
311  delete[] work1;
312  info = -30;
313  return info;
314  }
315 
316  highMem = (highMem > currentSize()) ? highMem : currentSize();
317 
318  tmpD = work2;
319 
320  double *theta = tmpD;
321  tmpD = tmpD + numEigen;
322 
323  double *normR = tmpD;
324  tmpD = tmpD + numEigen;
325 
326  double *MM = tmpD;
327  tmpD = tmpD + 9*numEigen*numEigen;
328 
329  double *KK = tmpD;
330  tmpD = tmpD + 9*numEigen*numEigen;
331 
332  double *S = tmpD;
333 
334  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
335 
336  // Define an array to store the residuals history
337  if (localVerbose > 2) {
338  resHistory = new (nothrow) double[maxIterEigenSolve*numEigen];
339  if (resHistory == 0) {
340  if (vectWeight)
341  delete vectWeight;
342  delete[] work1;
343  delete[] work2;
344  info = -30;
345  return info;
346  }
347  historyCount = 0;
348  }
349 
350  // Miscellaneous definitions
351 
352  bool reStart = false;
353  numRestart = 0;
354 
355  int localSize;
356  int firstIndex = knownEV;
357  int i, j;
358 
359  if (localVerbose > 0) {
360  cout << endl;
361  cout << " *|* Problem: ";
362  if (M)
363  cout << "K*Q = M*Q D ";
364  else
365  cout << "K*Q = Q D ";
366  if (Prec)
367  cout << " with preconditioner";
368  cout << endl;
369  cout << " *|* Algorithm = LOBPCG (Knyazev's version)" << endl;
370  cout << " *|* Size of blocks = " << blockSize << endl;
371  cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
372  cout.precision(2);
373  cout.setf(ios::scientific, ios::floatfield);
374  cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
375  cout << " *|* Norm used for convergence: ";
376  if (normWeight)
377  cout << "weighted L2-norm with user-provided weights" << endl;
378  else
379  cout << "L^2-norm" << endl;
380  if (startingEV > 0)
381  cout << " *|* Input converged eigenvectors = " << startingEV << endl;
382  cout << "\n -- Start iterations -- \n";
383  }
384 
385  timeOuterLoop -= MyWatch.WallTime();
386  for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
387 
388  highMem = (highMem > currentSize()) ? highMem : currentSize();
389 
390  int workingSize = numEigen - knownEV;
391 
392  Epetra_MultiVector XI(View, X, firstIndex, workingSize);
393  Epetra_MultiVector MXI(View, MX, firstIndex, workingSize);
394  Epetra_MultiVector KXI(View, KX, firstIndex, workingSize);
395 
396  Epetra_MultiVector HI(View, H, firstIndex, workingSize);
397  Epetra_MultiVector MHI(View, MH, firstIndex, workingSize);
398  Epetra_MultiVector KHI(View, KH, firstIndex, workingSize);
399 
400  Epetra_MultiVector PI(View, P, firstIndex, workingSize);
401  Epetra_MultiVector MPI(View, MP, firstIndex, workingSize);
402  Epetra_MultiVector KPI(View, KP, firstIndex, workingSize);
403 
404  Epetra_MultiVector RI(View, R, firstIndex, workingSize);
405 
406  if ((outerIter == 1) || (reStart == true)) {
407 
408  reStart = false;
409  localSize = numEigen;
410 
411  // Apply the mass matrix to X
412  timeMassOp -= MyWatch.WallTime();
413  if (M)
414  M->Apply(XI, MXI);
415  timeMassOp += MyWatch.WallTime();
416  massOp += workingSize;
417 
418  // Apply the stiffness matrix to X
419  timeStifOp -= MyWatch.WallTime();
420  K->Apply(XI, KXI);
421  timeStifOp += MyWatch.WallTime();
422  stifOp += workingSize;
423 
424  } // if ((outerIter == 1) || (reStart == true))
425  else {
426 
427  // Apply the preconditioner on the residuals
428  if (Prec) {
429  timePrecOp -= MyWatch.WallTime();
430  Prec->ApplyInverse(RI, HI);
431  timePrecOp += MyWatch.WallTime();
432  precOp += workingSize;
433  }
434  else {
435  memcpy(HI.Values(), RI.Values(), xr*workingSize*sizeof(double));
436  }
437 
438  // Apply the mass matrix on H
439  timeMassOp -= MyWatch.WallTime();
440  if (M)
441  M->Apply(HI, MHI);
442  timeMassOp += MyWatch.WallTime();
443  massOp += workingSize;
444 
445  // Apply the stiffness matrix to H
446  timeStifOp -= MyWatch.WallTime();
447  K->Apply(HI, KHI);
448  timeStifOp += MyWatch.WallTime();
449  stifOp += workingSize;
450 
451  if (localSize == numEigen)
452  localSize += workingSize;
453 
454  } // if ((outerIter == 1) || (reStart==true))
455 
456  // Form "local" mass and stiffness matrices
457  // Note: Use S as a temporary workspace
458  timeLocalProj -= MyWatch.WallTime();
459  modalTool.localProjection(numEigen, numEigen, xr, X.Values(), xr, KX.Values(), xr,
460  KK, localSize, S);
461  modalTool.localProjection(numEigen, numEigen, xr, X.Values(), xr, MX.Values(), xr,
462  MM, localSize, S);
463  if (localSize > numEigen) {
464  double *pointer = KK + numEigen*localSize;
465  modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, KHI.Values(), xr,
466  pointer, localSize, S);
467  modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, KHI.Values(), xr,
468  pointer + numEigen, localSize, S);
469  pointer = MM + numEigen*localSize;
470  modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, MHI.Values(), xr,
471  pointer, localSize, S);
472  modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, MHI.Values(), xr,
473  pointer + numEigen, localSize, S);
474  if (localSize > numEigen + workingSize) {
475  pointer = KK + (numEigen + workingSize)*localSize;
476  modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, KPI.Values(), xr,
477  pointer, localSize, S);
478  modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, KPI.Values(), xr,
479  pointer + numEigen, localSize, S);
480  modalTool.localProjection(workingSize, workingSize, xr, PI.Values(), xr, KPI.Values(), xr,
481  pointer + numEigen + workingSize, localSize, S);
482  pointer = MM + (numEigen + workingSize)*localSize;
483  modalTool.localProjection(numEigen, workingSize, xr, X.Values(), xr, MPI.Values(), xr,
484  pointer, localSize, S);
485  modalTool.localProjection(workingSize, workingSize, xr, HI.Values(), xr, MPI.Values(), xr,
486  pointer + numEigen, localSize, S);
487  modalTool.localProjection(workingSize, workingSize, xr, PI.Values(), xr, MPI.Values(), xr,
488  pointer + numEigen + workingSize, localSize, S);
489  } // if (localSize > numEigen + workingSize)
490  } // if (localSize > numEigen)
491  timeLocalProj += MyWatch.WallTime();
492 
493  // Perform a spectral decomposition
494  timeLocalSolve -= MyWatch.WallTime();
495  int nevLocal = localSize;
496  info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
497  S, localSize, theta, localVerbose,
498  (blockSize == 1) ? 1 : 0);
499  timeLocalSolve += MyWatch.WallTime();
500 
501  if (info < 0) {
502  // Stop when spectral decomposition has a critical failure
503  break;
504  } // if (info < 0)
505 
506  // Check for restarting
507  if ((theta[0] < 0.0) || (nevLocal < numEigen)) {
508  if (localVerbose > 0) {
509  cout << " Iteration " << outerIter;
510  cout << " - Failure for spectral decomposition - RESTART with new random search\n";
511  }
512  if (workingSize == 1) {
513  XI.Random();
514  }
515  else {
516  Epetra_MultiVector Xinit(View, XI, 1, workingSize-1);
517  Xinit.Random();
518  } // if (workingSize == 1)
519  reStart = true;
520  numRestart += 1;
521  info = 0;
522  continue;
523  } // if ((theta[0] < 0.0) || (nevLocal < numEigen))
524 
525  if ((localSize == numEigen+workingSize) && (nevLocal == numEigen)) {
526  for (j = 0; j < nevLocal; ++j)
527  memcpy(S+j*numEigen, S+j*localSize, numEigen*sizeof(double));
528  localSize = numEigen;
529  }
530 
531  if ((localSize == numEigen+2*workingSize) && (nevLocal <= numEigen+workingSize)) {
532  for (j = 0; j < nevLocal; ++j)
533  memcpy(S+j*(numEigen+workingSize), S+j*localSize, (numEigen+workingSize)*sizeof(double));
534  localSize = numEigen + workingSize;
535  }
536 
537  // Update the spaces
538  // Note: Use R as a temporary work space
539  timeLocalUpdate -= MyWatch.WallTime();
540 
541  memcpy(R.Values(), X.Values(), xr*numEigen*sizeof(double));
542  callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
543  0.0, X.Values(), xr);
544  if (M) {
545  memcpy(R.Values(), MX.Values(), xr*numEigen*sizeof(double));
546  callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
547  0.0, MX.Values(), xr);
548  }
549  memcpy(R.Values(), KX.Values(), xr*numEigen*sizeof(double));
550  callBLAS.GEMM('N', 'N', xr, numEigen, numEigen, 1.0, R.Values(), xr, S, localSize,
551  0.0, KX.Values(), xr);
552 
553  if (localSize == numEigen + workingSize) {
554  callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, HI.Values(), xr,
555  S + numEigen, localSize, 0.0, P.Values(), xr);
556  if (M) {
557  callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, MHI.Values(), xr,
558  S + numEigen, localSize, 0.0, MP.Values(), xr);
559  }
560  callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, KHI.Values(), xr,
561  S + numEigen, localSize, 0.0, KP.Values(), xr);
562  }
563 
564  if (localSize > numEigen + workingSize) {
565  memcpy(RI.Values(), PI.Values(), xr*workingSize*sizeof(double));
566  callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, HI.Values(), xr,
567  S + numEigen, localSize, 0.0, P.Values(), xr);
568  callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
569  S + numEigen + workingSize, localSize, 1.0, P.Values(), xr);
570  if (M) {
571  memcpy(RI.Values(), MPI.Values(), xr*workingSize*sizeof(double));
572  callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, MHI.Values(), xr,
573  S + numEigen, localSize, 0.0, MP.Values(), xr);
574  callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
575  S + numEigen + workingSize, localSize, 1.0, MP.Values(), xr);
576  }
577  memcpy(RI.Values(), KPI.Values(), xr*workingSize*sizeof(double));
578  callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, KHI.Values(), xr,
579  S + numEigen, localSize, 0.0, KP.Values(), xr);
580  callBLAS.GEMM('N', 'N', xr, numEigen, workingSize, 1.0, RI.Values(), xr,
581  S + numEigen + workingSize, localSize, 1.0, KP.Values(), xr);
582  }
583 
584  if (localSize > numEigen) {
585  callBLAS.AXPY(xr*numEigen, 1.0, P.Values(), X.Values());
586  if (M)
587  callBLAS.AXPY(xr*numEigen, 1.0, MP.Values(), MX.Values());
588  callBLAS.AXPY(xr*numEigen, 1.0, KP.Values(), KX.Values());
589  }
590  timeLocalUpdate += MyWatch.WallTime();
591 
592  // Compute the residuals
593  timeResidual -= MyWatch.WallTime();
594  memcpy(R.Values(), KX.Values(), xr*numEigen*sizeof(double));
595  for (j = 0; j < numEigen; ++j) {
596  callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
597  }
598  timeResidual += MyWatch.WallTime();
599  residual += numEigen;
600 
601  // Compute the norms of the residuals
602  timeNorm -= MyWatch.WallTime();
603  if (vectWeight) {
604  R.NormWeighted(*vectWeight, normR);
605  }
606  else {
607  R.Norm2(normR);
608  }
609  // Scale the norms of residuals with the eigenvalues
610  for (j = 0; j < numEigen; ++j) {
611  normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
612  }
613  timeNorm += MyWatch.WallTime();
614 
615  // When required, monitor some orthogonalities
616  if (verbose > 2) {
617  accuracyCheck(&X, &MX, &R);
618  } // if (verbose > 2)
619 
620  if (localSize == numEigen + workingSize)
621  localSize += workingSize;
622 
623  // Count the converged eigenvectors
624  timeNorm -= MyWatch.WallTime();
625  knownEV = 0;
626  for (i=0; i < numEigen; ++i) {
627  if (normR[i] < tolEigenSolve) {
628  lambda[i] = theta[i];
629  knownEV += 1;
630  }
631  }
632  timeNorm += MyWatch.WallTime();
633 
634  // Store the residual history
635  if (localVerbose > 2) {
636  memcpy(resHistory + historyCount, normR, numEigen*sizeof(double));
637  historyCount += numEigen;
638  }
639 
640  // Print information on current iteration
641  if (localVerbose > 0) {
642  cout << " Iteration " << outerIter;
643  cout << " - Number of converged eigenvectors " << knownEV << endl;
644  }
645 
646  if (localVerbose > 1) {
647  cout << endl;
648  cout.precision(2);
649  cout.setf(ios::scientific, ios::floatfield);
650  for (i=0; i<numEigen; ++i) {
651  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
652  cout << " = " << normR[i] << endl;
653  }
654  cout << endl;
655  cout.precision(2);
656  for (i=0; i<numEigen; ++i) {
657  cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
658  cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
659  cout << " = " << theta[i] << endl;
660  }
661  cout << endl;
662  }
663 
664  // Convergence test
665  if (knownEV >= numEigen) {
666  if (localVerbose == 1) {
667  cout << endl;
668  cout.precision(2);
669  cout.setf(ios::scientific, ios::floatfield);
670  for (i=0; i<numEigen; ++i) {
671  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
672  cout << " = " << normR[i] << endl;
673  }
674  cout << endl;
675  }
676  break;
677  }
678 
679  // Update the sizes
680  if ((knownEV > 0) && (localSize > numEigen)) {
681  if (localSize == numEigen + workingSize)
682  localSize = 2*numEigen - knownEV;
683  if (localSize == numEigen + 2*workingSize)
684  localSize = 3*numEigen - 2*knownEV;
685  }
686 
687  // Put the unconverged vectors at the end if needed
688 
689  for (i=0; i<numEigen; ++i) {
690  if (normR[i] >= tolEigenSolve) {
691  firstIndex = i;
692  break;
693  }
694  }
695 
696  // Continue the loop if no motion of vectors is necessary
697  if (firstIndex == numEigen-1)
698  continue;
699 
700  while (firstIndex < knownEV) {
701 
702  for (j = firstIndex; j < numEigen; ++j) {
703  if (normR[j] < tolEigenSolve) {
704  callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
705  callFortran.SWAP(xr, X.Values() + j*xr, 1, X.Values() + firstIndex*xr, 1);
706  callFortran.SWAP(xr, KX.Values() + j*xr, 1, KX.Values() + firstIndex*xr, 1);
707  callFortran.SWAP(xr, R.Values() + j*xr, 1, R.Values() + firstIndex*xr, 1);
708  callFortran.SWAP(xr, H.Values() + j*xr, 1, H.Values() + firstIndex*xr, 1);
709  callFortran.SWAP(xr, KH.Values() + j*xr, 1, KH.Values() + firstIndex*xr, 1);
710  callFortran.SWAP(xr, P.Values() + j*xr, 1, P.Values() + firstIndex*xr, 1);
711  callFortran.SWAP(xr, KP.Values() + j*xr, 1, KP.Values() + firstIndex*xr, 1);
712  if (M) {
713  callFortran.SWAP(xr, MX.Values() + j*xr, 1, MX.Values() + firstIndex*xr, 1);
714  callFortran.SWAP(xr, MH.Values() + j*xr, 1, MH.Values() + firstIndex*xr, 1);
715  callFortran.SWAP(xr, MP.Values() + j*xr, 1, MP.Values() + firstIndex*xr, 1);
716  }
717  break;
718  }
719  }
720 
721  for (i = firstIndex; i < numEigen; ++i) {
722  if (normR[i] >= tolEigenSolve) {
723  firstIndex = i;
724  break;
725  }
726  }
727 
728  }
729 
730  } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
731  timeOuterLoop += MyWatch.WallTime();
732  highMem = (highMem > currentSize()) ? highMem : currentSize();
733 
734  // Clean memory
735  delete[] work1;
736  delete[] work2;
737  if (vectWeight)
738  delete vectWeight;
739 
740  // Sort the eigenpairs
741  timePostProce -= MyWatch.WallTime();
742  if ((info == 0) && (knownEV > 0)) {
743  mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
744  }
745  timePostProce += MyWatch.WallTime();
746 
747  return (info == 0) ? knownEV : info;
748 
749 }
750 
751 
752 void KnyazevLOBPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
753  const Epetra_MultiVector *R) const {
754 
755  cout.precision(2);
756  cout.setf(ios::scientific, ios::floatfield);
757  double tmp;
758 
759  int myPid = MyComm.MyPID();
760 
761  if (X) {
762  if (M) {
763  if (MX) {
764  tmp = myVerify.errorEquality(X, MX, M);
765  if (myPid == 0)
766  cout << " >> Difference between MX and M*X = " << tmp << endl;
767  }
768  tmp = myVerify.errorOrthonormality(X, M);
769  if (myPid == 0)
770  cout << " >> Error in X^T M X - I = " << tmp << endl;
771  }
772  else {
773  tmp = myVerify.errorOrthonormality(X, 0);
774  if (myPid == 0)
775  cout << " >> Error in X^T X - I = " << tmp << endl;
776  }
777  }
778 
779  if ((R) && (X)) {
780  tmp = myVerify.errorOrthogonality(X, R);
781  if (myPid == 0)
782  cout << " >> Orthogonality X^T R up to " << tmp << endl;
783  }
784 
785 }
786 
787 
788 void KnyazevLOBPCG::initializeCounters() {
789 
790  historyCount = 0;
791  if (resHistory) {
792  delete[] resHistory;
793  resHistory = 0;
794  }
795 
796  memRequested = 0.0;
797  highMem = 0.0;
798 
799  massOp = 0;
800  numRestart = 0;
801  outerIter = 0;
802  precOp = 0;
803  residual = 0;
804  stifOp = 0;
805 
806  timeLocalProj = 0.0;
807  timeLocalSolve = 0.0;
808  timeLocalUpdate = 0.0;
809  timeMassOp = 0.0;
810  timeNorm = 0.0;
811  timeOuterLoop = 0.0;
812  timePostProce = 0.0;
813  timePrecOp = 0.0;
814  timeResidual = 0.0;
815  timeStifOp = 0.0;
816 
817 
818 }
819 
820 
821 void KnyazevLOBPCG::algorithmInfo() const {
822 
823  int myPid = MyComm.MyPID();
824 
825  if (myPid ==0) {
826  cout << " Algorithm: LOBPCG (Knyazev's version) with Cholesky-based local eigensolver\n";
827  cout << " Block Size: " << blockSize << endl;
828  }
829 
830 }
831 
832 
833 void KnyazevLOBPCG::historyInfo() const {
834 
835  if (resHistory) {
836  int j;
837  cout << " Block Size: " << blockSize << endl;
838  cout << endl;
839  cout << " Residuals\n";
840  cout << endl;
841  cout.precision(4);
842  cout.setf(ios::scientific, ios::floatfield);
843  for (j = 0; j < historyCount; ++j) {
844  cout << resHistory[j] << endl;
845  }
846  cout << endl;
847  }
848 
849 }
850 
851 
852 void KnyazevLOBPCG::memoryInfo() const {
853 
854  int myPid = MyComm.MyPID();
855 
856  double maxHighMem = 0.0;
857  double tmp = highMem;
858  MyComm.MaxAll(&tmp, &maxHighMem, 1);
859 
860  double maxMemRequested = 0.0;
861  tmp = memRequested;
862  MyComm.MaxAll(&tmp, &maxMemRequested, 1);
863 
864  if (myPid == 0) {
865  cout.precision(2);
866  cout.setf(ios::fixed, ios::floatfield);
867  cout << " Memory requested per processor by the eigensolver = (EST) ";
868  cout.width(6);
869  cout << maxMemRequested << " MB " << endl;
870  cout << endl;
871  cout << " High water mark in eigensolver = (EST) ";
872  cout.width(6);
873  cout << maxHighMem << " MB " << endl;
874  cout << endl;
875  }
876 
877 }
878 
879 
880 void KnyazevLOBPCG::operationInfo() const {
881 
882  int myPid = MyComm.MyPID();
883 
884  if (myPid == 0) {
885  cout << " --- Operations ---\n\n";
886  cout << " Total number of mass matrix multiplications = ";
887  cout.width(9);
888  cout << massOp << endl;
889  cout << " Total number of stiffness matrix operations = ";
890  cout.width(9);
891  cout << stifOp << endl;
892  cout << " Total number of preconditioner applications = ";
893  cout.width(9);
894  cout << precOp << endl;
895  cout << " Total number of computed eigen-residuals = ";
896  cout.width(9);
897  cout << residual << endl;
898  cout << "\n";
899  cout << " Total number of outer iterations = ";
900  cout.width(9);
901  cout << outerIter << endl;
902  cout << " Number of restarts = ";
903  cout.width(9);
904  cout << numRestart << endl;
905  cout << "\n";
906  }
907 
908 }
909 
910 
911 void KnyazevLOBPCG::timeInfo() const {
912 
913  int myPid = MyComm.MyPID();
914 
915  if (myPid == 0) {
916  cout << " --- Timings ---\n\n";
917  cout.setf(ios::fixed, ios::floatfield);
918  cout.precision(2);
919  cout << " Total time for outer loop = ";
920  cout.width(9);
921  cout << timeOuterLoop << " s" << endl;
922  cout << " Time for mass matrix operations = ";
923  cout.width(9);
924  cout << timeMassOp << " s ";
925  cout.width(5);
926  cout << 100*timeMassOp/timeOuterLoop << " %\n";
927  cout << " Time for stiffness matrix operations = ";
928  cout.width(9);
929  cout << timeStifOp << " s ";
930  cout.width(5);
931  cout << 100*timeStifOp/timeOuterLoop << " %\n";
932  cout << " Time for preconditioner applications = ";
933  cout.width(9);
934  cout << timePrecOp << " s ";
935  cout.width(5);
936  cout << 100*timePrecOp/timeOuterLoop << " %\n";
937  cout << " Time for local projection = ";
938  cout.width(9);
939  cout << timeLocalProj << " s ";
940  cout.width(5);
941  cout << 100*timeLocalProj/timeOuterLoop << " %\n";
942  cout << " Time for local eigensolve = ";
943  cout.width(9);
944  cout << timeLocalSolve << " s ";
945  cout.width(5);
946  cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
947  cout << " Time for local update = ";
948  cout.width(9);
949  cout << timeLocalUpdate << " s ";
950  cout.width(5);
951  cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
952  cout << " Time for residual computations = ";
953  cout.width(9);
954  cout << timeResidual << " s ";
955  cout.width(5);
956  cout << 100*timeResidual/timeOuterLoop << " %\n";
957  cout << " Time for residuals norm computations = ";
958  cout.width(9);
959  cout << timeNorm << " s ";
960  cout.width(5);
961  cout << 100*timeNorm/timeOuterLoop << " %\n";
962  cout << "\n";
963  cout << " Total time for post-processing = ";
964  cout.width(9);
965  cout << timePostProce << " s\n";
966  cout << endl;
967  } // if (myPid == 0)
968 
969 }
970 
971