Anasazi  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
LOBPCG.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Anasazi: Block Eigensolvers Package
4 //
5 // Copyright 2004 NTESS and the Anasazi contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 //**************************************************************************
11 //
12 // NOTICE
13 //
14 // This software is a result of the research described in the cout
15 //
16 // " A comparison of algorithms for modal analysis in the absence
17 // of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
18 // Sandia National Laboratories, Technical cout SAND2003-1028J.
19 //
20 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
21 // framework ( http://software.sandia.gov/trilinos/ ).
22 //
23 // The distribution of this software follows also the rules defined in Trilinos.
24 // This notice shall be marked on any reproduction of this software, in whole or
25 // in part.
26 //
27 // This program is distributed in the hope that it will be useful, but
28 // WITHOUT ANY WARRANTY; without even the implied warranty of
29 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30 //
31 // Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
32 //
33 //**************************************************************************
34 
35 #include "LOBPCG.h"
36 
37 
38 LOBPCG::LOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
39  const Epetra_Operator *PP, int _blk,
40  double _tol, int _maxIter, int _verb) :
41  myVerify(_Comm),
42  callBLAS(),
43  callFortran(),
44  modalTool(_Comm),
45  mySort(),
46  MyComm(_Comm),
47  K(KK),
48  M(0),
49  Prec(PP),
50  MyWatch(_Comm),
51  tolEigenSolve(_tol),
52  maxIterEigenSolve(_maxIter),
53  blockSize(_blk),
54  normWeight(0),
55  verbose(_verb),
56  historyCount(0),
57  resHistory(0),
58  memRequested(0.0),
59  highMem(0.0),
60  massOp(0),
61  numRestart(0),
62  outerIter(0),
63  precOp(0),
64  residual(0),
65  stifOp(0),
66  timeLocalProj(0.0),
67  timeLocalSolve(0.0),
68  timeLocalUpdate(0.0),
69  timeMassOp(0.0),
70  timeNorm(0.0),
71  timeOrtho(0.0),
72  timeOuterLoop(0.0),
73  timePostProce(0.0),
74  timePrecOp(0.0),
75  timeResidual(0.0),
76  timeRestart(0.0),
77  timeStifOp(0.0)
78  {
79 
80 }
81 
82 
83 LOBPCG::LOBPCG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
84  const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk,
85  double _tol, int _maxIter, int _verb,
86  double *_weight) :
87  myVerify(_Comm),
88  callBLAS(),
89  callFortran(),
90  modalTool(_Comm),
91  mySort(),
92  MyComm(_Comm),
93  K(KK),
94  M(MM),
95  Prec(PP),
96  MyWatch(_Comm),
97  tolEigenSolve(_tol),
98  maxIterEigenSolve(_maxIter),
99  blockSize(_blk),
100  normWeight(_weight),
101  verbose(_verb),
102  historyCount(0),
103  resHistory(0),
104  memRequested(0.0),
105  highMem(0.0),
106  massOp(0),
107  numRestart(0),
108  outerIter(0),
109  precOp(0),
110  residual(0),
111  stifOp(0),
112  timeLocalProj(0.0),
113  timeLocalSolve(0.0),
114  timeLocalUpdate(0.0),
115  timeMassOp(0.0),
116  timeNorm(0.0),
117  timeOrtho(0.0),
118  timeOuterLoop(0.0),
119  timePostProce(0.0),
120  timePrecOp(0.0),
121  timeResidual(0.0),
122  timeRestart(0.0),
123  timeStifOp(0.0)
124  {
125 
126 }
127 
128 
129 LOBPCG::~LOBPCG() {
130 
131  if (resHistory) {
132  delete[] resHistory;
133  resHistory = 0;
134  }
135 
136 }
137 
138 
139 int LOBPCG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
140 
141  // Computes the smallest eigenvalues and the corresponding eigenvectors
142  // of the generalized eigenvalue problem
143  //
144  // K X = M X Lambda
145  //
146  // using a modified Locally Optimal Block Preconditioned Conjugate
147  // Gradient method.
148  //
149  // Note that if M is not specified, then K X = X Lambda is solved.
150  //
151  // Ref: A. Knyazev, "Toward the optimal preconditioned eigensolver:
152  // Locally optimal block preconditioner conjugate gradient method",
153  // SIAM J. Sci. Comput., vol 23, n 2, pp. 517-541
154  //
155  // Input variables:
156  //
157  // numEigen (integer) = Number of eigenmodes requested
158  //
159  // Q (Epetra_MultiVector) = Converged eigenvectors
160  // The number of columns of Q must be equal to numEigen + blockSize.
161  // The rows of Q are distributed across processors.
162  // At exit, the first numEigen columns contain the eigenvectors requested.
163  //
164  // lambda (array of doubles) = Converged eigenvalues
165  // At input, it must be of size numEigen + blockSize.
166  // At exit, the first numEigen locations contain the eigenvalues requested.
167  //
168  // startingEV (integer) = Number of existing converged eigenmodes
169  //
170  // Return information on status of computation
171  //
172  // info >= 0 >> Number of converged eigenpairs at the end of computation
173  //
174  // // Failure due to input arguments
175  //
176  // info = - 1 >> The stiffness matrix K has not been specified.
177  // info = - 2 >> The maps for the matrix K and the matrix M differ.
178  // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
179  // info = - 4 >> The maps for the vectors and the matrix K differ.
180  // info = - 5 >> Q is too small for the number of eigenvalues requested.
181  // info = - 6 >> Q is too small for the computation parameters.
182  //
183  // info = - 10 >> Failure during the mass orthonormalization
184  //
185  // info = - 20 >> Error in LAPACK during the local eigensolve
186  //
187  // info = - 30 >> MEMORY
188  //
189 
190  // Check the input parameters
191 
192  if (numEigen <= startingEV) {
193  return numEigen;
194  }
195 
196  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
197  if (info < 0)
198  return info;
199 
200  int myPid = MyComm.MyPID();
201 
202  // Get the weight for approximating the M-inverse norm
203  Epetra_Vector *vectWeight = 0;
204  if (normWeight) {
205  vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
206  }
207 
208  int knownEV = startingEV;
209  int localVerbose = verbose*(myPid==0);
210 
211  // Define local block vectors
212  //
213  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
214  // KX = Working vectors (storing K*X)
215  //
216  // R = Residuals
217  //
218  // H = Preconditioned search space
219  // MH = Working vectors (storing M*H if M is specified, else pointing to H)
220  // KH = Working vectors (storing K*H)
221  //
222  // P = Search directions
223  // MP = Working vectors (storing M*P if M is specified, else pointing to P)
224  // KP = Working vectors (storing K*P)
225 
226  int xr = Q.MyLength();
227  Epetra_MultiVector X(View, Q, numEigen, blockSize);
228  X.Random();
229 
230  int tmp;
231  tmp = (M == 0) ? 6*blockSize*xr : 9*blockSize*xr;
232 
233  double *work1 = new (nothrow) double[tmp];
234  if (work1 == 0) {
235  if (vectWeight)
236  delete vectWeight;
237  info = -30;
238  return info;
239  }
240  memRequested += sizeof(double)*tmp/(1024.0*1024.0);
241 
242  highMem = (highMem > currentSize()) ? highMem : currentSize();
243 
244  double *tmpD = work1;
245 
246  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
247  tmpD = tmpD + xr*blockSize;
248 
249  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
250  tmpD = (M) ? tmpD + xr*blockSize : tmpD;
251 
252  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
253  tmpD = tmpD + xr*blockSize;
254 
255  Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
256  tmpD = tmpD + xr*blockSize;
257 
258  Epetra_MultiVector KH(View, Q.Map(), tmpD, xr, blockSize);
259  tmpD = tmpD + xr*blockSize;
260 
261  Epetra_MultiVector MH(View, Q.Map(), (M) ? tmpD : H.Values(), xr, blockSize);
262  tmpD = (M) ? tmpD + xr*blockSize : tmpD;
263 
264  Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
265  tmpD = tmpD + xr*blockSize;
266 
267  Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
268  tmpD = tmpD + xr*blockSize;
269 
270  Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);
271 
272  // Define arrays
273  //
274  // theta = Store the local eigenvalues (size: 3*blockSize)
275  // normR = Store the norm of residuals (size: blockSize)
276  //
277  // MM = Local mass matrix (size: 3*blockSize x 3*blockSize)
278  // KK = Local stiffness matrix (size: 3*blockSize x 3*blockSize)
279  //
280  // S = Local eigenvectors (size: 3*blockSize x 3*blockSize)
281 
282  int lwork2;
283  lwork2 = 4*blockSize + 27*blockSize*blockSize;
284  double *work2 = new (nothrow) double[lwork2];
285  if (work2 == 0) {
286  if (vectWeight)
287  delete vectWeight;
288  delete[] work1;
289  info = -30;
290  return info;
291  }
292 
293  highMem = (highMem > currentSize()) ? highMem : currentSize();
294 
295  tmpD = work2;
296 
297  double *theta = tmpD;
298  tmpD = tmpD + 3*blockSize;
299 
300  double *normR = tmpD;
301  tmpD = tmpD + blockSize;
302 
303  double *MM = tmpD;
304  tmpD = tmpD + 9*blockSize*blockSize;
305 
306  double *KK = tmpD;
307  tmpD = tmpD + 9*blockSize*blockSize;
308 
309  double *S = tmpD;
310 
311  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
312 
313  // Define an array to store the residuals history
314  if (localVerbose > 2) {
315  resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
316  if (resHistory == 0) {
317  if (vectWeight)
318  delete vectWeight;
319  delete[] work1;
320  delete[] work2;
321  info = -30;
322  return info;
323  }
324  historyCount = 0;
325  }
326 
327  // Miscellaneous definitions
328 
329  bool reStart = false;
330  numRestart = 0;
331 
332  int localSize;
333  int twoBlocks = 2*blockSize;
334  int threeBlocks = 3*blockSize;
335  int nFound = blockSize;
336  int i, j;
337 
338  if (localVerbose > 0) {
339  cout << endl;
340  cout << " *|* Problem: ";
341  if (M)
342  cout << "K*Q = M*Q D ";
343  else
344  cout << "K*Q = Q D ";
345  if (Prec)
346  cout << " with preconditioner";
347  cout << endl;
348  cout << " *|* Algorithm = LOBPCG" << endl;
349  cout << " *|* Size of blocks = " << blockSize << endl;
350  cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
351  cout.precision(2);
352  cout.setf(ios::scientific, ios::floatfield);
353  cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
354  cout << " *|* Norm used for convergence: ";
355  if (normWeight){
356  cout << "weighted L2-norm with user-provided weights" << endl;
357  }
358  else {
359  cout << "L^2-norm" << endl;
360  }
361  if (startingEV > 0)
362  cout << " *|* Input converged eigenvectors = " << startingEV << endl;
363  cout << "\n -- Start iterations -- \n";
364  }
365 
366  timeOuterLoop -= MyWatch.WallTime();
367  for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
368 
369  highMem = (highMem > currentSize()) ? highMem : currentSize();
370 
371  if ((outerIter == 1) || (reStart == true)) {
372 
373  reStart = false;
374  localSize = blockSize;
375 
376  if (nFound > 0) {
377 
378  Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
379  Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
380  Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);
381 
382  // Apply the mass matrix to X
383  timeMassOp -= MyWatch.WallTime();
384  if (M)
385  M->Apply(X2, MX2);
386  timeMassOp += MyWatch.WallTime();
387  massOp += nFound;
388 
389  if (knownEV > 0) {
390  // Orthonormalize X against the known eigenvectors with Gram-Schmidt
391  // Note: Use R as a temporary work space
392  Epetra_MultiVector copyQ(View, Q, 0, knownEV);
393  timeOrtho -= MyWatch.WallTime();
394  info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 1, R.Values());
395  timeOrtho += MyWatch.WallTime();
396  // Exit the code if the orthogonalization did not succeed
397  if (info < 0) {
398  info = -10;
399  if (vectWeight)
400  delete vectWeight;
401  delete[] work1;
402  delete[] work2;
403  return info;
404  }
405  }
406 
407  // Apply the stiffness matrix to X
408  timeStifOp -= MyWatch.WallTime();
409  K->Apply(X2, KX2);
410  timeStifOp += MyWatch.WallTime();
411  stifOp += nFound;
412 
413  } // if (nFound > 0)
414 
415  } // if ((outerIter == 1) || (reStart == true))
416  else {
417 
418  // Apply the preconditioner on the residuals
419  if (Prec) {
420  timePrecOp -= MyWatch.WallTime();
421  Prec->ApplyInverse(R, H);
422  timePrecOp += MyWatch.WallTime();
423  precOp += blockSize;
424  }
425  else {
426  memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
427  }
428 
429  // Apply the mass matrix on H
430  timeMassOp -= MyWatch.WallTime();
431  if (M)
432  M->Apply(H, MH);
433  timeMassOp += MyWatch.WallTime();
434  massOp += blockSize;
435 
436  if (knownEV > 0) {
437  // Orthogonalize H against the known eigenvectors
438  // Note: Use R as a temporary work space
439  Epetra_MultiVector copyQ(View, Q, 0, knownEV);
440  timeOrtho -= MyWatch.WallTime();
441  modalTool.massOrthonormalize(H, MH, M, copyQ, blockSize, 1, R.Values());
442  timeOrtho += MyWatch.WallTime();
443  }
444 
445  // Apply the stiffness matrix to H
446  timeStifOp -= MyWatch.WallTime();
447  K->Apply(H, KH);
448  timeStifOp += MyWatch.WallTime();
449  stifOp += blockSize;
450 
451  if (localSize == blockSize)
452  localSize += blockSize;
453 
454  } // if ((outerIter == 1) || (reStart==true))
455 
456  // Form "local" mass and stiffness matrices
457  // Note: Use S as a temporary workspace
458  timeLocalProj -= MyWatch.WallTime();
459  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
460  KK, localSize, S);
461  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
462  MM, localSize, S);
463  if (localSize > blockSize) {
464  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KH.Values(), xr,
465  KK + blockSize*localSize, localSize, S);
466  modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KH.Values(), xr,
467  KK + blockSize*localSize + blockSize, localSize, S);
468  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MH.Values(), xr,
469  MM + blockSize*localSize, localSize, S);
470  modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MH.Values(), xr,
471  MM + blockSize*localSize + blockSize, localSize, S);
472  if (localSize > twoBlocks) {
473  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
474  KK + twoBlocks*localSize, localSize, S);
475  modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KP.Values(), xr,
476  KK + twoBlocks*localSize + blockSize, localSize, S);
477  modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
478  KK + twoBlocks*localSize + twoBlocks, localSize, S);
479  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
480  MM + twoBlocks*localSize, localSize, S);
481  modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MP.Values(), xr,
482  MM + twoBlocks*localSize + blockSize, localSize, S);
483  modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
484  MM + twoBlocks*localSize + twoBlocks, localSize, S);
485  } // if (localSize > twoBlocks)
486  } // if (localSize > blockSize)
487  timeLocalProj += MyWatch.WallTime();
488 
489  // Perform a spectral decomposition
490  timeLocalSolve -= MyWatch.WallTime();
491  int nevLocal = localSize;
492  info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
493  S, localSize, theta, localVerbose,
494  (blockSize == 1) ? 1 : 0);
495  timeLocalSolve += MyWatch.WallTime();
496 
497  if (info < 0) {
498  // Stop when spectral decomposition has a critical failure
499  break;
500  } // if (info < 0)
501 
502  // Check for restarting
503  if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
504  if (localVerbose > 0) {
505  cout << " Iteration " << outerIter;
506  cout << "- Failure for spectral decomposition - RESTART with new random search\n";
507  }
508  if (blockSize == 1) {
509  X.Random();
510  nFound = blockSize;
511  }
512  else {
513  Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
514  Xinit.Random();
515  nFound = blockSize - 1;
516  } // if (blockSize == 1)
517  reStart = true;
518  numRestart += 1;
519  info = 0;
520  continue;
521  } // if ((theta[0] < 0.0) || (nevLocal < blockSize))
522 
523  if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
524  for (j = 0; j < nevLocal; ++j)
525  memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
526  localSize = blockSize;
527  }
528 
529  if ((localSize == threeBlocks) && (nevLocal <= twoBlocks)) {
530  for (j = 0; j < nevLocal; ++j)
531  memcpy(S + j*twoBlocks, S + j*threeBlocks, twoBlocks*sizeof(double));
532  localSize = twoBlocks;
533  }
534 
535  // Compute the residuals
536  timeResidual -= MyWatch.WallTime();
537  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
538  S, localSize, 0.0, R.Values(), xr);
539  if (localSize >= twoBlocks) {
540  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
541  S + blockSize, localSize, 1.0, R.Values(), xr);
542  if (localSize == threeBlocks) {
543  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
544  S + twoBlocks, localSize, 1.0, R.Values(), xr);
545  }
546  }
547  for (j = 0; j < blockSize; ++j)
548  callBLAS.SCAL(localSize, theta[j], S + j*localSize);
549  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
550  S, localSize, 1.0, R.Values(), xr);
551  if (localSize >= twoBlocks) {
552  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MH.Values(), xr,
553  S + blockSize, localSize, 1.0, R.Values(), xr);
554  if (localSize == threeBlocks) {
555  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
556  S + twoBlocks, localSize, 1.0, R.Values(), xr);
557  }
558  }
559  for (j = 0; j < blockSize; ++j)
560  callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
561  timeResidual += MyWatch.WallTime();
562 
563  // Compute the norms of the residuals
564  timeNorm -= MyWatch.WallTime();
565  if (vectWeight)
566  R.NormWeighted(*vectWeight, normR);
567  else
568  R.Norm2(normR);
569 
570  // Scale the norms of residuals with the eigenvalues
571  // Count the converged eigenvectors
572  nFound = 0;
573  for (j = 0; j < blockSize; ++j) {
574  normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
575  if (normR[j] < tolEigenSolve)
576  nFound += 1;
577  }
578  timeNorm += MyWatch.WallTime();
579 
580  // Store the residual history
581  if (localVerbose > 2) {
582  memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
583  historyCount += 1;
584  }
585 
586  // Print information on current iteration
587  if (localVerbose > 0) {
588  cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
589  cout << knownEV + nFound << endl;
590  }
591 
592  if (localVerbose > 1) {
593  cout << endl;
594  cout.precision(2);
595  cout.setf(ios::scientific, ios::floatfield);
596  for (i=0; i<blockSize; ++i) {
597  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
598  cout << " = " << normR[i] << endl;
599  }
600  cout << endl;
601  cout.precision(2);
602  for (i=0; i<blockSize; ++i) {
603  cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
604  cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
605  cout << " = " << theta[i] << endl;
606  }
607  cout << endl;
608  }
609 
610  if (nFound == 0) {
611  // Update the spaces
612  // Note: Use R as a temporary work space
613  timeLocalUpdate -= MyWatch.WallTime();
614  memcpy(R.Values(), X.Values(), xr*blockSize*sizeof(double));
615  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
616  0.0, X.Values(), xr);
617  memcpy(R.Values(), KX.Values(), xr*blockSize*sizeof(double));
618  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
619  0.0, KX.Values(), xr);
620  if (M) {
621  memcpy(R.Values(), MX.Values(), xr*blockSize*sizeof(double));
622  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
623  0.0, MX.Values(), xr);
624  }
625  if (localSize == twoBlocks) {
626  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
627  S+blockSize, localSize, 0.0, P.Values(), xr);
628  callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
629  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
630  S+blockSize, localSize, 0.0, KP.Values(), xr);
631  callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
632  if (M) {
633  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr,
634  S+blockSize, localSize, 0.0, MP.Values(), xr);
635  callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
636  }
637  } // if (localSize == twoBlocks)
638  if (localSize == threeBlocks) {
639  memcpy(R.Values(), P.Values(), xr*blockSize*sizeof(double));
640  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
641  S + twoBlocks, localSize, 0.0, P.Values(), xr);
642  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
643  S + blockSize, localSize, 1.0, P.Values(), xr);
644  callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
645  memcpy(R.Values(), KP.Values(), xr*blockSize*sizeof(double));
646  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
647  S + twoBlocks, localSize, 0.0, KP.Values(), xr);
648  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
649  S + blockSize, localSize, 1.0, KP.Values(), xr);
650  callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
651  if (M) {
652  memcpy(R.Values(), MP.Values(), xr*blockSize*sizeof(double));
653  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
654  S + twoBlocks, localSize, 0.0, MP.Values(), xr);
655  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr,
656  S + blockSize, localSize, 1.0, MP.Values(), xr);
657  callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
658  }
659  } // if (localSize == threeBlocks)
660  timeLocalUpdate += MyWatch.WallTime();
661  // Compute the new residuals
662  timeResidual -= MyWatch.WallTime();
663  memcpy(R.Values(), KX.Values(), blockSize*xr*sizeof(double));
664  for (j = 0; j < blockSize; ++j)
665  callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
666  timeResidual += MyWatch.WallTime();
667  // When required, monitor some orthogonalities
668  if (verbose > 2) {
669  if (knownEV == 0) {
670  accuracyCheck(&X, &MX, &R, 0, 0, 0);
671  }
672  else {
673  Epetra_MultiVector copyQ(View, Q, 0, knownEV);
674  accuracyCheck(&X, &MX, &R, &copyQ, (localSize>blockSize) ? &H : 0,
675  (localSize>twoBlocks) ? &P : 0);
676  }
677  } // if (verbose > 2)
678  if (localSize < threeBlocks)
679  localSize += blockSize;
680  continue;
681  } // if (nFound == 0)
682 
683  // Order the Ritz eigenvectors by putting the converged vectors at the beginning
684  int firstIndex = blockSize;
685  for (j = 0; j < blockSize; ++j) {
686  if (normR[j] >= tolEigenSolve) {
687  firstIndex = j;
688  break;
689  }
690  } // for (j = 0; j < blockSize; ++j)
691  while (firstIndex < nFound) {
692  for (j = firstIndex; j < blockSize; ++j) {
693  if (normR[j] < tolEigenSolve) {
694  // Swap the j-th and firstIndex-th position
695  callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
696  callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
697  callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
698  break;
699  }
700  } // for (j = firstIndex; j < blockSize; ++j)
701  for (j = 0; j < blockSize; ++j) {
702  if (normR[j] >= tolEigenSolve) {
703  firstIndex = j;
704  break;
705  }
706  } // for (j = 0; j < blockSize; ++j)
707  } // while (firstIndex < nFound)
708 
709  // Copy the converged eigenvalues
710  memcpy(lambda + knownEV, theta, nFound*sizeof(double));
711 
712  // Convergence test
713  if (knownEV + nFound >= numEigen) {
714  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
715  S, localSize, 0.0, R.Values(), xr);
716  if (localSize >= twoBlocks) {
717  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
718  S + blockSize, localSize, 1.0, R.Values(), xr);
719  if (localSize == threeBlocks) {
720  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
721  S + twoBlocks, localSize, 1.0, R.Values(), xr);
722  }
723  }
724  memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
725  knownEV += nFound;
726  if (localVerbose == 1) {
727  cout << endl;
728  cout.precision(2);
729  cout.setf(ios::scientific, ios::floatfield);
730  for (i=0; i<blockSize; ++i) {
731  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
732  cout << " = " << normR[i] << endl;
733  }
734  cout << endl;
735  }
736  break;
737  }
738 
739  // Store the converged eigenvalues and eigenvectors
740  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
741  S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
742  if (localSize >= twoBlocks) {
743  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
744  S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
745  if (localSize >= threeBlocks)
746  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
747  S + twoBlocks, localSize, 1.0, Q.Values() + knownEV*xr, xr);
748  }
749  knownEV += nFound;
750 
751  // Define the restarting vectors
752  timeRestart -= MyWatch.WallTime();
753  int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
754  double *Snew = S + nFound*localSize;
755  memcpy(R.Values(), X.Values(), blockSize*xr*sizeof(double));
756  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
757  Snew, localSize, 0.0, X.Values(), xr);
758  memcpy(R.Values(), KX.Values(), blockSize*xr*sizeof(double));
759  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
760  Snew, localSize, 0.0, KX.Values(), xr);
761  if (M) {
762  memcpy(R.Values(), MX.Values(), blockSize*xr*sizeof(double));
763  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
764  Snew, localSize, 0.0, MX.Values(), xr);
765  }
766  if (localSize >= twoBlocks) {
767  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
768  Snew+blockSize, localSize, 1.0, X.Values(), xr);
769  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KH.Values(), xr,
770  Snew+blockSize, localSize, 1.0, KX.Values(), xr);
771  if (M) {
772  callBLAS.GEMM('N','N', xr, leftOver, blockSize, 1.0, MH.Values(), xr,
773  Snew+blockSize, localSize, 1.0, MX.Values(), xr);
774  }
775  if (localSize >= threeBlocks) {
776  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
777  Snew+twoBlocks, localSize, 1.0, X.Values(), xr);
778  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
779  Snew+twoBlocks, localSize, 1.0, KX.Values(), xr);
780  if (M) {
781  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
782  Snew+twoBlocks, localSize, 1.0, MX.Values(), xr);
783  }
784  }
785  }
786  if (nevLocal < blockSize + nFound) {
787  // Put new random vectors at the end of the block
788  Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
789  Xtmp.Random();
790  }
791  else {
792  nFound = 0;
793  }
794  reStart = true;
795  timeRestart += MyWatch.WallTime();
796 
797  } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
798  timeOuterLoop += MyWatch.WallTime();
799  highMem = (highMem > currentSize()) ? highMem : currentSize();
800 
801  // Clean memory
802  delete[] work1;
803  delete[] work2;
804  if (vectWeight)
805  delete vectWeight;
806 
807  // Sort the eigenpairs
808  timePostProce -= MyWatch.WallTime();
809  if ((info == 0) && (knownEV > 0)) {
810  mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
811  }
812  timePostProce += MyWatch.WallTime();
813 
814  return (info == 0) ? knownEV : info;
815 
816 }
817 
818 
819 void LOBPCG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
820  const Epetra_MultiVector *R, const Epetra_MultiVector *Q,
821  const Epetra_MultiVector *H, const Epetra_MultiVector *P) const {
822 
823  cout.precision(2);
824  cout.setf(ios::scientific, ios::floatfield);
825  double tmp;
826 
827  int myPid = MyComm.MyPID();
828 
829  if (X) {
830  if (M) {
831  if (MX) {
832  tmp = myVerify.errorEquality(X, MX, M);
833  if (myPid == 0)
834  cout << " >> Difference between MX and M*X = " << tmp << endl;
835  }
836  tmp = myVerify.errorOrthonormality(X, M);
837  if (myPid == 0)
838  cout << " >> Error in X^T M X - I = " << tmp << endl;
839  }
840  else {
841  tmp = myVerify.errorOrthonormality(X, 0);
842  if (myPid == 0)
843  cout << " >> Error in X^T X - I = " << tmp << endl;
844  }
845  }
846 
847  if ((R) && (X)) {
848  tmp = myVerify.errorOrthogonality(X, R);
849  if (myPid == 0)
850  cout << " >> Orthogonality X^T R up to " << tmp << endl;
851  }
852 
853  if (Q == 0)
854  return;
855 
856  if (M) {
857  tmp = myVerify.errorOrthonormality(Q, M);
858  if (myPid == 0)
859  cout << " >> Error in Q^T M Q - I = " << tmp << endl;
860  if (X) {
861  tmp = myVerify.errorOrthogonality(Q, X, M);
862  if (myPid == 0)
863  cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
864  }
865  if (H) {
866  tmp = myVerify.errorOrthogonality(Q, H, M);
867  if (myPid == 0)
868  cout << " >> Orthogonality Q^T M H up to " << tmp << endl;
869  }
870  if (P) {
871  tmp = myVerify.errorOrthogonality(Q, P, M);
872  if (myPid == 0)
873  cout << " >> Orthogonality Q^T M P up to " << tmp << endl;
874  }
875  }
876  else {
877  tmp = myVerify.errorOrthonormality(Q, 0);
878  if (myPid == 0)
879  cout << " >> Error in Q^T Q - I = " << tmp << endl;
880  if (X) {
881  tmp = myVerify.errorOrthogonality(Q, X, 0);
882  if (myPid == 0)
883  cout << " >> Orthogonality Q^T X up to " << tmp << endl;
884  }
885  if (H) {
886  tmp = myVerify.errorOrthogonality(Q, H, 0);
887  if (myPid == 0)
888  cout << " >> Orthogonality Q^T H up to " << tmp << endl;
889  }
890  if (P) {
891  tmp = myVerify.errorOrthogonality(Q, P, 0);
892  if (myPid == 0)
893  cout << " >> Orthogonality Q^T P up to " << tmp << endl;
894  }
895  }
896 
897 }
898 
899 
900 void LOBPCG::initializeCounters() {
901 
902  historyCount = 0;
903  if (resHistory) {
904  delete[] resHistory;
905  resHistory = 0;
906  }
907 
908  memRequested = 0.0;
909  highMem = 0.0;
910 
911  massOp = 0;
912  numRestart = 0;
913  outerIter = 0;
914  precOp = 0;
915  residual = 0;
916  stifOp = 0;
917 
918  timeLocalProj = 0.0;
919  timeLocalSolve = 0.0;
920  timeLocalUpdate = 0.0;
921  timeMassOp = 0.0;
922  timeNorm = 0.0;
923  timeOrtho = 0.0;
924  timeOuterLoop = 0.0;
925  timePostProce = 0.0;
926  timePrecOp = 0.0;
927  timeResidual = 0.0;
928  timeRestart = 0.0;
929  timeStifOp = 0.0;
930 
931 
932 }
933 
934 
935 void LOBPCG::algorithmInfo() const {
936 
937  int myPid = MyComm.MyPID();
938 
939  if (myPid ==0) {
940  cout << " Algorithm: LOBPCG (block version) with Cholesky-based local eigensolver\n";
941  cout << " Block Size: " << blockSize << endl;
942  }
943 
944 }
945 
946 
947 void LOBPCG::historyInfo() const {
948 
949  if (resHistory) {
950  int j;
951  cout << " Block Size: " << blockSize << endl;
952  cout << endl;
953  cout << " Residuals\n";
954  cout << endl;
955  cout.precision(4);
956  cout.setf(ios::scientific, ios::floatfield);
957  for (j = 0; j < historyCount; ++j) {
958  int ii;
959  for (ii = 0; ii < blockSize; ++ii)
960  cout << resHistory[blockSize*j + ii] << endl;
961  }
962  cout << endl;
963  }
964 
965 }
966 
967 
968 void LOBPCG::memoryInfo() const {
969 
970  int myPid = MyComm.MyPID();
971 
972  double maxHighMem = 0.0;
973  double tmp = highMem;
974  MyComm.MaxAll(&tmp, &maxHighMem, 1);
975 
976  double maxMemRequested = 0.0;
977  tmp = memRequested;
978  MyComm.MaxAll(&tmp, &maxMemRequested, 1);
979 
980  if (myPid == 0) {
981  cout.precision(2);
982  cout.setf(ios::fixed, ios::floatfield);
983  cout << " Memory requested per processor by the eigensolver = (EST) ";
984  cout.width(6);
985  cout << maxMemRequested << " MB " << endl;
986  cout << endl;
987  cout << " High water mark in eigensolver = (EST) ";
988  cout.width(6);
989  cout << maxHighMem << " MB " << endl;
990  cout << endl;
991  }
992 
993 }
994 
995 
996 void LOBPCG::operationInfo() const {
997 
998  int myPid = MyComm.MyPID();
999 
1000  if (myPid == 0) {
1001  cout << " --- Operations ---\n\n";
1002  cout << " Total number of mass matrix multiplications = ";
1003  cout.width(9);
1004  cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
1005  cout << " Mass multiplications in solver loop = ";
1006  cout.width(9);
1007  cout << massOp << endl;
1008  cout << " Mass multiplications for orthogonalization = ";
1009  cout.width(9);
1010  cout << modalTool.getNumProj_MassMult() << endl;
1011  cout << " Mass multiplications for normalization = ";
1012  cout.width(9);
1013  cout << modalTool.getNumNorm_MassMult() << endl;
1014  cout << " Total number of stiffness matrix operations = ";
1015  cout.width(9);
1016  cout << stifOp << endl;
1017  cout << " Total number of preconditioner applications = ";
1018  cout.width(9);
1019  cout << precOp << endl;
1020  cout << " Total number of computed eigen-residuals = ";
1021  cout.width(9);
1022  cout << residual << endl;
1023  cout << "\n";
1024  cout << " Total number of outer iterations = ";
1025  cout.width(9);
1026  cout << outerIter << endl;
1027  cout << " Number of restarts = ";
1028  cout.width(9);
1029  cout << numRestart << endl;
1030  cout << "\n";
1031  }
1032 
1033 }
1034 
1035 
1036 void LOBPCG::timeInfo() const {
1037 
1038  int myPid = MyComm.MyPID();
1039 
1040  if (myPid == 0) {
1041  cout << " --- Timings ---\n\n";
1042  cout.setf(ios::fixed, ios::floatfield);
1043  cout.precision(2);
1044  cout << " Total time for outer loop = ";
1045  cout.width(9);
1046  cout << timeOuterLoop << " s" << endl;
1047  cout << " Time for mass matrix operations = ";
1048  cout.width(9);
1049  cout << timeMassOp << " s ";
1050  cout.width(5);
1051  cout << 100*timeMassOp/timeOuterLoop << " %\n";
1052  cout << " Time for stiffness matrix operations = ";
1053  cout.width(9);
1054  cout << timeStifOp << " s ";
1055  cout.width(5);
1056  cout << 100*timeStifOp/timeOuterLoop << " %\n";
1057  cout << " Time for preconditioner applications = ";
1058  cout.width(9);
1059  cout << timePrecOp << " s ";
1060  cout.width(5);
1061  cout << 100*timePrecOp/timeOuterLoop << " %\n";
1062  cout << " Time for orthogonalizations = ";
1063  cout.width(9);
1064  cout << timeOrtho << " s ";
1065  cout.width(5);
1066  cout << 100*timeOrtho/timeOuterLoop << " %\n";
1067  cout << " Projection step : ";
1068  cout.width(9);
1069  cout << modalTool.getTimeProj() << " s\n";
1070  cout << " Q^T mult. :: ";
1071  cout.width(9);
1072  cout << modalTool.getTimeProj_QtMult() << " s\n";
1073  cout << " Q mult. :: ";
1074  cout.width(9);
1075  cout << modalTool.getTimeProj_QMult() << " s\n";
1076  cout << " Mass mult. :: ";
1077  cout.width(9);
1078  cout << modalTool.getTimeProj_MassMult() << " s\n";
1079  cout << " Normalization step : ";
1080  cout.width(9);
1081  cout << modalTool.getTimeNorm() << " s\n";
1082  cout << " Mass mult. :: ";
1083  cout.width(9);
1084  cout << modalTool.getTimeNorm_MassMult() << " s\n";
1085  cout << " Time for local projection = ";
1086  cout.width(9);
1087  cout << timeLocalProj << " s ";
1088  cout.width(5);
1089  cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1090  cout << " Time for local eigensolve = ";
1091  cout.width(9);
1092  cout << timeLocalSolve << " s ";
1093  cout.width(5);
1094  cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1095  cout << " Time for local update = ";
1096  cout.width(9);
1097  cout << timeLocalUpdate << " s ";
1098  cout.width(5);
1099  cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1100  cout << " Time for residual computations = ";
1101  cout.width(9);
1102  cout << timeResidual << " s ";
1103  cout.width(5);
1104  cout << 100*timeResidual/timeOuterLoop << " %\n";
1105  cout << " Time for residuals norm computations = ";
1106  cout.width(9);
1107  cout << timeNorm << " s ";
1108  cout.width(5);
1109  cout << 100*timeNorm/timeOuterLoop << " %\n";
1110  cout << " Time for restarting space definition = ";
1111  cout.width(9);
1112  cout << timeRestart << " s ";
1113  cout.width(5);
1114  cout << 100*timeRestart/timeOuterLoop << " %\n";
1115  cout << "\n";
1116  cout << " Total time for post-processing = ";
1117  cout.width(9);
1118  cout << timePostProce << " s\n";
1119  cout << endl;
1120  } // if (myPid == 0)
1121 
1122 }
1123 
1124