Anasazi  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
BRQMIN.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Anasazi: Block Eigensolvers Package
4 //
5 // Copyright 2004 NTESS and the Anasazi contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // This software is a result of the research described in the report
11 //
12 // "A comparison of algorithms for modal analysis in the absence
13 // of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
14 // Sandia National Laboratories, Technical report SAND2003-1028J.
15 //
16 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
17 // framework ( http://trilinos.org/ ).
18 
19 #include "BRQMIN.h"
20 
21 
22 BRQMIN::BRQMIN(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
23  const Epetra_Operator *PP, int _blk,
24  double _tol, int _maxIter, int _verb) :
25  myVerify(_Comm),
26  callBLAS(),
27  callFortran(),
28  callLAPACK(),
29  modalTool(_Comm),
30  mySort(),
31  MyComm(_Comm),
32  K(KK),
33  M(0),
34  Prec(PP),
35  MyWatch(_Comm),
36  tolEigenSolve(_tol),
37  maxIterEigenSolve(_maxIter),
38  blockSize(_blk),
39  normWeight(0),
40  verbose(_verb),
41  historyCount(0),
42  resHistory(0),
43  memRequested(0.0),
44  highMem(0.0),
45  massOp(0),
46  numRestart(0),
47  outerIter(0),
48  precOp(0),
49  residual(0),
50  stifOp(0),
51  timeLocalProj(0.0),
52  timeLocalSolve(0.0),
53  timeLocalUpdate(0.0),
54  timeMassOp(0.0),
55  timeNorm(0.0),
56  timeOrtho(0.0),
57  timeOuterLoop(0.0),
58  timePostProce(0.0),
59  timePrecOp(0.0),
60  timeResidual(0.0),
61  timeRestart(0.0),
62  timeSearchP(0.0),
63  timeStifOp(0.0)
64  {
65 
66 }
67 
68 
69 BRQMIN::BRQMIN(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
70  const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk,
71  double _tol, int _maxIter, int _verb,
72  double *_weight) :
73  myVerify(_Comm),
74  callBLAS(),
75  callFortran(),
76  callLAPACK(),
77  modalTool(_Comm),
78  mySort(),
79  MyComm(_Comm),
80  K(KK),
81  M(MM),
82  Prec(PP),
83  MyWatch(_Comm),
84  tolEigenSolve(_tol),
85  maxIterEigenSolve(_maxIter),
86  blockSize(_blk),
87  normWeight(_weight),
88  verbose(_verb),
89  historyCount(0),
90  resHistory(0),
91  memRequested(0.0),
92  highMem(0.0),
93  massOp(0),
94  numRestart(0),
95  outerIter(0),
96  precOp(0),
97  residual(0),
98  stifOp(0),
99  timeLocalProj(0.0),
100  timeLocalSolve(0.0),
101  timeLocalUpdate(0.0),
102  timeMassOp(0.0),
103  timeNorm(0.0),
104  timeOrtho(0.0),
105  timeOuterLoop(0.0),
106  timePostProce(0.0),
107  timePrecOp(0.0),
108  timeResidual(0.0),
109  timeRestart(0.0),
110  timeSearchP(0.0),
111  timeStifOp(0.0)
112  {
113 
114 }
115 
116 
117 BRQMIN::~BRQMIN() {
118 
119  if (resHistory) {
120  delete[] resHistory;
121  resHistory = 0;
122  }
123 
124 }
125 
126 
127 int BRQMIN::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
128 
129  return BRQMIN::reSolve(numEigen, Q, lambda);
130 
131 }
132 
133 
134 int BRQMIN::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
135 
136  // Computes the smallest eigenvalues and the corresponding eigenvectors
137  // of the generalized eigenvalue problem
138  //
139  // K X = M X Lambda
140  //
141  // using a Block Rayleigh Quotient MINimization.
142  //
143  // Note that if M is not specified, then K X = X Lambda is solved.
144  //
145  // Ref: P. Arbenz & R. Lehoucq, "A comparison of algorithms for modal analysis in the
146  // absence of a sparse direct method", SNL, Technical Report SAND2003-1028J
147  // With the notations of this report, the coefficient beta is defined as
148  // ( P^T K P )^{-1} P^T K H
149  //
150  // Input variables:
151  //
152  // numEigen (integer) = Number of eigenmodes requested
153  //
154  // Q (Epetra_MultiVector) = Converged eigenvectors
155  // The number of columns of Q must be equal to numEigen + blockSize.
156  // The rows of Q are distributed across processors.
157  // At exit, the first numEigen columns contain the eigenvectors requested.
158  //
159  // lambda (array of doubles) = Converged eigenvalues
160  // At input, it must be of size numEigen + blockSize.
161  // At exit, the first numEigen locations contain the eigenvalues requested.
162  //
163  // startingEV (integer) = Number of existing converged eigenmodes
164  //
165  // Return information on status of computation
166  //
167  // info >= 0 >> Number of converged eigenpairs at the end of computation
168  //
169  // // Failure due to input arguments
170  //
171  // info = - 1 >> The stiffness matrix K has not been specified.
172  // info = - 2 >> The maps for the matrix K and the matrix M differ.
173  // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
174  // info = - 4 >> The maps for the vectors and the matrix K differ.
175  // info = - 5 >> Q is too small for the number of eigenvalues requested.
176  // info = - 6 >> Q is too small for the computation parameters.
177  //
178  // info = - 10 >> Failure during the mass orthonormalization
179  //
180  // info = - 20 >> Error in LAPACK during the local eigensolve
181  //
182  // info = - 30 >> MEMORY
183  //
184 
185  // Check the input parameters
186 
187  if (numEigen <= startingEV) {
188  return startingEV;
189  }
190 
191  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen + blockSize);
192  if (info < 0)
193  return info;
194 
195  int myPid = MyComm.MyPID();
196 
197  // Get the weight for approximating the M-inverse norm
198  Epetra_Vector *vectWeight = 0;
199  if (normWeight) {
200  vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
201  }
202 
203  int knownEV = startingEV;
204  int localVerbose = verbose*(myPid==0);
205 
206  // Define local block vectors
207  //
208  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
209  // KX = Working vectors (storing K*X)
210  //
211  // R = Residuals
212  //
213  // H = Preconditioned residuals
214  //
215  // P = Search directions
216  // MP = Working vectors (storing M*P if M is specified, else pointing to P)
217  // KP = Working vectors (storing K*P)
218 
219  int xr = Q.MyLength();
220  Epetra_MultiVector X(View, Q, numEigen, blockSize);
221  X.Random();
222 
223  int tmp;
224  tmp = (M == 0) ? 5*blockSize*xr : 7*blockSize*xr;
225 
226  double *work1 = new (nothrow) double[tmp];
227  if (work1 == 0) {
228  if (vectWeight)
229  delete vectWeight;
230  info = -30;
231  return info;
232  }
233  memRequested += sizeof(double)*tmp/(1024.0*1024.0);
234 
235  highMem = (highMem > currentSize()) ? highMem : currentSize();
236 
237  double *tmpD = work1;
238 
239  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
240  tmpD = tmpD + xr*blockSize;
241 
242  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
243  tmpD = (M) ? tmpD + xr*blockSize : tmpD;
244 
245  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
246  tmpD = tmpD + xr*blockSize;
247 
248  Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
249  tmpD = tmpD + xr*blockSize;
250 
251  Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
252  tmpD = tmpD + xr*blockSize;
253 
254  Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
255  tmpD = tmpD + xr*blockSize;
256 
257  Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);
258 
259  // Define arrays
260  //
261  // theta = Store the local eigenvalues (size: 2*blockSize)
262  // normR = Store the norm of residuals (size: blockSize)
263  //
264  // MM = Local mass matrix (size: 2*blockSize x 2*blockSize)
265  // KK = Local stiffness matrix (size: 2*blockSize x 2*blockSize)
266  //
267  // S = Local eigenvectors (size: 2*blockSize x 2*blockSize)
268 
269  int lwork2;
270  lwork2 = 3*blockSize + 12*blockSize*blockSize;
271  double *work2 = new (nothrow) double[lwork2];
272  if (work2 == 0) {
273  if (vectWeight)
274  delete vectWeight;
275  delete[] work1;
276  info = -30;
277  return info;
278  }
279 
280  highMem = (highMem > currentSize()) ? highMem : currentSize();
281 
282  tmpD = work2;
283 
284  double *theta = tmpD;
285  tmpD = tmpD + 2*blockSize;
286 
287  double *normR = tmpD;
288  tmpD = tmpD + blockSize;
289 
290  double *MM = tmpD;
291  tmpD = tmpD + 4*blockSize*blockSize;
292 
293  double *KK = tmpD;
294  tmpD = tmpD + 4*blockSize*blockSize;
295 
296  double *S = tmpD;
297 
298  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
299 
300  // Define an array to store the residuals history
301  if (localVerbose > 2) {
302  resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
303  if (resHistory == 0) {
304  if (vectWeight)
305  delete vectWeight;
306  delete[] work1;
307  delete[] work2;
308  info = -30;
309  return info;
310  }
311  historyCount = 0;
312  }
313 
314  // Miscellaneous definitions
315 
316  bool reStart = false;
317  numRestart = 0;
318 
319  int localSize;
320  int twoBlocks = 2*blockSize;
321  int nFound = blockSize;
322  int i, j;
323 
324  if (localVerbose > 0) {
325  cout << endl;
326  cout << " *|* Problem: ";
327  if (M)
328  cout << "K*Q = M*Q D ";
329  else
330  cout << "K*Q = Q D ";
331  if (Prec)
332  cout << " with preconditioner";
333  cout << endl;
334  cout << " *|* Algorithm = BRQMIN" << endl;
335  cout << " *|* Size of blocks = " << blockSize << endl;
336  cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
337  cout.precision(2);
338  cout.setf(ios::scientific, ios::floatfield);
339  cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
340  cout << " *|* Norm used for convergence: ";
341  if (normWeight)
342  cout << "weighted L2-norm with user-provided weights" << endl;
343  else
344  cout << "L^2-norm" << endl;
345  if (startingEV > 0)
346  cout << " *|* Input converged eigenvectors = " << startingEV << endl;
347  cout << "\n -- Start iterations -- \n";
348  }
349 
350  timeOuterLoop -= MyWatch.WallTime();
351  for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
352 
353  highMem = (highMem > currentSize()) ? highMem : currentSize();
354 
355  if ((outerIter == 1) || (reStart == true)) {
356 
357  reStart = false;
358  localSize = blockSize;
359 
360  if (nFound > 0) {
361 
362  Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
363  Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
364  Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);
365 
366  // Apply the mass matrix to X
367  timeMassOp -= MyWatch.WallTime();
368  if (M)
369  M->Apply(X2, MX2);
370  timeMassOp += MyWatch.WallTime();
371  massOp += nFound;
372 
373  if (knownEV > 0) {
374  // Orthonormalize X against the known eigenvectors with Gram-Schmidt
375  // Note: Use R as a temporary work space
376  Epetra_MultiVector copyQ(View, Q, 0, knownEV);
377  timeOrtho -= MyWatch.WallTime();
378  info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values());
379  timeOrtho += MyWatch.WallTime();
380  // Exit the code if the orthogonalization did not succeed
381  if (info < 0) {
382  info = -10;
383  delete[] work1;
384  delete[] work2;
385  if (vectWeight)
386  delete vectWeight;
387  return info;
388  }
389  }
390 
391  // Apply the stiffness matrix to X
392  timeStifOp -= MyWatch.WallTime();
393  K->Apply(X2, KX2);
394  timeStifOp += MyWatch.WallTime();
395  stifOp += nFound;
396 
397  } // if (nFound > 0)
398 
399  } // if ((outerIter == 1) || (reStart == true))
400  else {
401 
402  // Apply the preconditioner on the residuals
403  if (Prec != 0) {
404  timePrecOp -= MyWatch.WallTime();
405  Prec->ApplyInverse(R, H);
406  timePrecOp += MyWatch.WallTime();
407  precOp += blockSize;
408  }
409  else {
410  memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
411  }
412 
413  timeSearchP -= MyWatch.WallTime();
414  // Define the new search direction
415  if (localSize == blockSize) {
416  P.Scale(-1.0, H);
417  localSize = twoBlocks;
418  } // if (localSize == blockSize)
419  else {
420  // Compute the projected stiffness matrix P^T K P
421  // Note: Use S as a workspace
422  // Use KK to store this matrix
423  modalTool.localProjection(blockSize, blockSize, xr, KP.Values(), xr, P.Values(), xr,
424  KK, blockSize, S);
425  // Factor the projected stiffness matrix P^T K P
426  callLAPACK.POTRF('U', blockSize, KK, blockSize, &info);
427  if (info != 0) {
428  if (info < 0) {
429  if (localVerbose > 0) {
430  cerr << " Iteration " << outerIter;
431  cerr << " - DPOTRF has a critical failure (" << info << ")" << endl;
432  }
433  info = - 20;
434  break;
435  }
436  // Restart the computation
437  if (localVerbose > 0) {
438  cout << " Iteration " << outerIter;
439  cout << " - Failure for DPOTRF";
440  cout << " - RESET search directions to residuals\n";
441  }
442  P.Scale(-1.0, H);
443  } // if (info != 0)
444  else {
445  // Compute the projected stiffness matrix P^T K H
446  // Note: Use S as a workspace
447  // Use MM to store this matrix
448  modalTool.localProjection(blockSize, blockSize, xr, KP.Values(), xr, H.Values(), xr,
449  MM, blockSize, S);
450  callLAPACK.POTRS('U', blockSize, blockSize, KK, blockSize, MM, blockSize, &info);
451  if (info < 0) {
452  if (localVerbose > 0) {
453  cerr << " Iteration " << outerIter;
454  cerr << " - DPOTRS has a critical failure (" << info << ")" << endl;
455  }
456  info = -20;
457  break;
458  }
459  // Define the new search directions
460  // Note: Use R as a workspace
461  memcpy(R.Values(), P.Values(), xr*blockSize*sizeof(double));
462  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, MM, blockSize,
463  0.0, P.Values(), xr);
464  callBLAS.AXPY(xr*blockSize, -1.0, H.Values(), P.Values());
465 
466  // Check orthogonality of P with previous directions
467  // Note: Use KK and MM as workspaces
468  if (verbose > 2) {
469  callBLAS.GEMM('T', 'N', blockSize, blockSize, xr, 1.0, P.Values(), xr,
470  KP.Values(), xr, 0.0, MM, blockSize);
471  MyComm.SumAll(MM, KK, blockSize*blockSize);
472  if (localVerbose > 0) {
473  double dotMax = 0.0;
474  for (j = 0; j < blockSize*blockSize; ++j) {
475  dotMax = (fabs(KK[j]) > dotMax) ? fabs(KK[j]) : dotMax;
476  }
477  cout << endl;
478  cout << " K-Orthogonality check for search directions = " << dotMax << endl;
479  cout << endl;
480  }
481  } // if (verbose > 2)
482 
483  } // if (info != 0)
484 
485  } // if (localSize == blockSize)
486  timeSearchP += MyWatch.WallTime();
487 
488  // Apply the mass matrix on P
489  timeMassOp -= MyWatch.WallTime();
490  if (M)
491  M->Apply(P, MP);
492  timeMassOp += MyWatch.WallTime();
493  massOp += blockSize;
494 
495  if (knownEV > 0) {
496  // Orthogonalize P against the known eigenvectors
497  // Note: Use R as a temporary work space
498  Epetra_MultiVector copyQ(View, Q, 0, knownEV);
499  timeOrtho -= MyWatch.WallTime();
500  modalTool.massOrthonormalize(P, MP, M, copyQ, blockSize, 1, R.Values());
501  timeOrtho += MyWatch.WallTime();
502  }
503 
504  // Apply the stiffness matrix to P
505  timeStifOp -= MyWatch.WallTime();
506  K->Apply(P, KP);
507  timeStifOp += MyWatch.WallTime();
508  stifOp += blockSize;
509 
510  } // if ((outerIter == 1) || (reStart == true))
511 
512  // Form "local" mass and stiffness matrices
513  // Note: Use S as a temporary workspace
514  timeLocalProj -= MyWatch.WallTime();
515  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
516  KK, localSize, S);
517  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
518  MM, localSize, S);
519  if (localSize > blockSize) {
520  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
521  KK + blockSize*localSize, localSize, S);
522  modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
523  KK + blockSize*localSize + blockSize, localSize, S);
524  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
525  MM + blockSize*localSize, localSize, S);
526  modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
527  MM + blockSize*localSize + blockSize, localSize, S);
528  } // if (localSize > blockSize)
529  timeLocalProj += MyWatch.WallTime();
530 
531  // Perform a spectral decomposition
532  int nevLocal = localSize;
533  timeLocalSolve -= MyWatch.WallTime();
534  info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
535  S, localSize, theta, localVerbose,
536  (blockSize == 1) ? 1 : 0);
537  timeLocalSolve += MyWatch.WallTime();
538 
539  if (info < 0) {
540  // Stop when spectral decomposition has a critical failure
541  break;
542  } // if (info < 0)
543 
544  // Check for restarting
545  if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
546  if (localVerbose > 0) {
547  cout << " Iteration " << outerIter;
548  cout << "- Failure for spectral decomposition - RESTART with new random search\n";
549  }
550  if (blockSize == 1) {
551  X.Random();
552  nFound = blockSize;
553  }
554  else {
555  Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
556  Xinit.Random();
557  nFound = blockSize - 1;
558  } // if (blockSize == 1)
559  reStart = true;
560  numRestart += 1;
561  info = 0;
562  continue;
563  } // if ((theta[0] < 0.0) || (nevLocal < blockSize))
564 
565  if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
566  for (j = 0; j < nevLocal; ++j)
567  memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
568  localSize = blockSize;
569  }
570 
571  // Check the direction of eigenvectors
572  for (j = 0; j < nevLocal; ++j) {
573  double coeff = S[j + j*localSize];
574  if (coeff < 0.0)
575  callBLAS.SCAL(localSize, -1.0, S + j*localSize);
576  }
577 
578  // Compute the residuals
579  timeResidual -= MyWatch.WallTime();
580  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
581  S, localSize, 0.0, R.Values(), xr);
582  if (localSize == twoBlocks) {
583  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
584  S + blockSize, localSize, 1.0, R.Values(), xr);
585  }
586  for (j = 0; j < blockSize; ++j)
587  callBLAS.SCAL(localSize, theta[j], S + j*localSize);
588  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
589  S, localSize, 1.0, R.Values(), xr);
590  if (localSize == twoBlocks) {
591  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
592  S + blockSize, localSize, 1.0, R.Values(), xr);
593  }
594  for (j = 0; j < blockSize; ++j)
595  callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
596  timeResidual += MyWatch.WallTime();
597 
598  // Compute the norms of the residuals
599  timeNorm -= MyWatch.WallTime();
600  if (vectWeight)
601  R.NormWeighted(*vectWeight, normR);
602  else
603  R.Norm2(normR);
604  // Scale the norms of residuals with the eigenvalues
605  // Count the converged eigenvectors
606  nFound = 0;
607  for (j = 0; j < blockSize; ++j) {
608  normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
609  if (normR[j] < tolEigenSolve)
610  nFound += 1;
611  }
612  timeNorm += MyWatch.WallTime();
613 
614  // Store the residual history
615  if (localVerbose > 2) {
616  memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
617  historyCount += 1;
618  }
619 
620  // Print information on current iteration
621  if (localVerbose > 0) {
622  cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
623  cout << knownEV + nFound << endl;
624  }
625 
626  if (localVerbose > 1) {
627  cout << endl;
628  cout.precision(2);
629  cout.setf(ios::scientific, ios::floatfield);
630  for (i=0; i<blockSize; ++i) {
631  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
632  cout << " = " << normR[i] << endl;
633  }
634  cout << endl;
635  cout.precision(2);
636  for (i=0; i<blockSize; ++i) {
637  cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
638  cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
639  cout << " = " << theta[i] << endl;
640  }
641  cout << endl;
642  }
643 
644  if (nFound == 0) {
645  // Update the spaces
646  // Note: Use H as a temporary work space
647  timeLocalUpdate -= MyWatch.WallTime();
648  memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double));
649  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
650  0.0, X.Values(), xr);
651  memcpy(H.Values(), KX.Values(), xr*blockSize*sizeof(double));
652  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
653  0.0, KX.Values(), xr);
654  if (M) {
655  memcpy(H.Values(), MX.Values(), xr*blockSize*sizeof(double));
656  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
657  0.0, MX.Values(), xr);
658  }
659  if (localSize == twoBlocks) {
660  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, P.Values(), xr,
661  S + blockSize, localSize, 1.0, X.Values(), xr);
662  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
663  S + blockSize, localSize, 1.0, KX.Values(), xr);
664  if (M) {
665  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MP.Values(), xr,
666  S + blockSize, localSize, 1.0, MX.Values(), xr);
667  }
668  } // if (localSize == twoBlocks)
669  timeLocalUpdate += MyWatch.WallTime();
670  // When required, monitor some orthogonalities
671  if (verbose > 2) {
672  if (knownEV == 0) {
673  accuracyCheck(&X, &MX, &R, 0, (localSize>blockSize) ? &P : 0);
674  }
675  else {
676  Epetra_MultiVector copyQ(View, Q, 0, knownEV);
677  accuracyCheck(&X, &MX, &R, &copyQ, (localSize>blockSize) ? &P : 0);
678  }
679  } // if (verbose > 2)
680  continue;
681  } // if (nFound == 0)
682 
683  // Order the Ritz eigenvectors by putting the converged vectors at the beginning
684  int firstIndex = blockSize;
685  for (j = 0; j < blockSize; ++j) {
686  if (normR[j] >= tolEigenSolve) {
687  firstIndex = j;
688  break;
689  }
690  } // for (j = 0; j < blockSize; ++j)
691  while (firstIndex < nFound) {
692  for (j = firstIndex; j < blockSize; ++j) {
693  if (normR[j] < tolEigenSolve) {
694  // Swap the j-th and firstIndex-th position
695  callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
696  callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
697  callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
698  break;
699  }
700  } // for (j = firstIndex; j < blockSize; ++j)
701  for (j = 0; j < blockSize; ++j) {
702  if (normR[j] >= tolEigenSolve) {
703  firstIndex = j;
704  break;
705  }
706  } // for (j = 0; j < blockSize; ++j)
707  } // while (firstIndex < nFound)
708 
709  // Copy the converged eigenvalues
710  memcpy(lambda + knownEV, theta, nFound*sizeof(double));
711 
712  // Convergence test
713  if (knownEV + nFound >= numEigen) {
714  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
715  S, localSize, 0.0, R.Values(), xr);
716  if (localSize > blockSize) {
717  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
718  S + blockSize, localSize, 1.0, R.Values(), xr);
719  }
720  memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
721  knownEV += nFound;
722  if (localVerbose == 1) {
723  cout << endl;
724  cout.precision(2);
725  cout.setf(ios::scientific, ios::floatfield);
726  for (i=0; i<blockSize; ++i) {
727  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
728  cout << " = " << normR[i] << endl;
729  }
730  cout << endl;
731  }
732  break;
733  }
734 
735  // Store the converged eigenvalues and eigenvectors
736  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
737  S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
738  if (localSize == twoBlocks) {
739  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
740  S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
741  }
742  knownEV += nFound;
743 
744  // Define the restarting vectors
745  timeRestart -= MyWatch.WallTime();
746  int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
747  double *Snew = S + nFound*localSize;
748  memcpy(H.Values(), X.Values(), blockSize*xr*sizeof(double));
749  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
750  Snew, localSize, 0.0, X.Values(), xr);
751  memcpy(H.Values(), KX.Values(), blockSize*xr*sizeof(double));
752  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
753  Snew, localSize, 0.0, KX.Values(), xr);
754  if (M) {
755  memcpy(H.Values(), MX.Values(), blockSize*xr*sizeof(double));
756  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
757  Snew, localSize, 0.0, MX.Values(), xr);
758  }
759  if (localSize == twoBlocks) {
760  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
761  Snew+blockSize, localSize, 1.0, X.Values(), xr);
762  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
763  Snew+blockSize, localSize, 1.0, KX.Values(), xr);
764  if (M) {
765  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
766  Snew+blockSize, localSize, 1.0, MX.Values(), xr);
767  }
768  } // if (localSize == twoBlocks)
769  if (nevLocal < blockSize + nFound) {
770  // Put new random vectors at the end of the block
771  Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
772  Xtmp.Random();
773  }
774  else {
775  nFound = 0;
776  } // if (nevLocal < blockSize + nFound)
777  reStart = true;
778  timeRestart += MyWatch.WallTime();
779 
780  } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
781  timeOuterLoop += MyWatch.WallTime();
782  highMem = (highMem > currentSize()) ? highMem : currentSize();
783 
784  // Clean memory
785  delete[] work1;
786  delete[] work2;
787  if (vectWeight)
788  delete vectWeight;
789 
790  // Sort the eigenpairs
791  timePostProce -= MyWatch.WallTime();
792  if ((info == 0) && (knownEV > 0)) {
793  mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
794  }
795  timePostProce += MyWatch.WallTime();
796 
797  return (info == 0) ? knownEV : info;
798 
799 }
800 
801 
802 void BRQMIN::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
803  const Epetra_MultiVector *R, const Epetra_MultiVector *Q,
804  const Epetra_MultiVector *P) const {
805 
806  cout.precision(2);
807  cout.setf(ios::scientific, ios::floatfield);
808  double tmp;
809 
810  int myPid = MyComm.MyPID();
811 
812  if (X) {
813  if (M) {
814  if (MX) {
815  tmp = myVerify.errorEquality(X, MX, M);
816  if (myPid == 0)
817  cout << " >> Relative difference between MX and M*X = " << tmp << endl;
818  }
819  tmp = myVerify.errorOrthonormality(X, M);
820  if (myPid == 0)
821  cout << " >> Error in X^T M X - I = " << tmp << endl;
822  }
823  else {
824  tmp = myVerify.errorOrthonormality(X, 0);
825  if (myPid == 0)
826  cout << " >> Error in X^T X - I = " << tmp << endl;
827  }
828  }
829 
830  if ((R) && (X)) {
831  tmp = myVerify.errorOrthogonality(X, R);
832  if (myPid == 0)
833  cout << " >> Orthogonality X^T R up to " << tmp << endl;
834  }
835 
836  if (Q == 0)
837  return;
838 
839  if (M) {
840  tmp = myVerify.errorOrthonormality(Q, M);
841  if (myPid == 0)
842  cout << " >> Error in Q^T M Q - I = " << tmp << endl;
843  if (X) {
844  tmp = myVerify.errorOrthogonality(Q, X, M);
845  if (myPid == 0)
846  cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
847  }
848  if (P) {
849  tmp = myVerify.errorOrthogonality(Q, P, M);
850  if (myPid == 0)
851  cout << " >> Orthogonality Q^T M P up to " << tmp << endl;
852  }
853  }
854  else {
855  tmp = myVerify.errorOrthonormality(Q, 0);
856  if (myPid == 0)
857  cout << " >> Error in Q^T Q - I = " << tmp << endl;
858  if (X) {
859  tmp = myVerify.errorOrthogonality(Q, X, 0);
860  if (myPid == 0)
861  cout << " >> Orthogonality Q^T X up to " << tmp << endl;
862  }
863  if (P) {
864  tmp = myVerify.errorOrthogonality(Q, P, 0);
865  if (myPid == 0)
866  cout << " >> Orthogonality Q^T P up to " << tmp << endl;
867  }
868  }
869 
870 }
871 
872 
873 void BRQMIN::initializeCounters() {
874 
875  historyCount = 0;
876  if (resHistory) {
877  delete[] resHistory;
878  resHistory = 0;
879  }
880 
881  memRequested = 0.0;
882  highMem = 0.0;
883 
884  massOp = 0;
885  numRestart = 0;
886  outerIter = 0;
887  precOp = 0;
888  residual = 0;
889  stifOp = 0;
890 
891  timeLocalProj = 0.0;
892  timeLocalSolve = 0.0;
893  timeLocalUpdate = 0.0;
894  timeMassOp = 0.0;
895  timeNorm = 0.0;
896  timeOrtho = 0.0;
897  timeOuterLoop = 0.0;
898  timePostProce = 0.0;
899  timePrecOp = 0.0;
900  timeResidual = 0.0;
901  timeRestart = 0.0;
902  timeSearchP = 0.0;
903  timeStifOp = 0.0;
904 
905 
906 }
907 
908 
909 void BRQMIN::algorithmInfo() const {
910 
911  int myPid = MyComm.MyPID();
912 
913  if (myPid ==0) {
914  cout << " Algorithm: BRQMIN with Cholesky-based local eigensolver\n";
915  cout << " Block Size: " << blockSize << endl;
916  }
917 
918 }
919 
920 
921 void BRQMIN::historyInfo() const {
922 
923  if (resHistory) {
924  int j;
925  cout << " Block Size: " << blockSize << endl;
926  cout << endl;
927  cout << " Residuals\n";
928  cout << endl;
929  cout.precision(4);
930  cout.setf(ios::scientific, ios::floatfield);
931  for (j = 0; j < historyCount; ++j) {
932  int ii;
933  for (ii = 0; ii < blockSize; ++ii)
934  cout << resHistory[blockSize*j + ii] << endl;
935  }
936  cout << endl;
937  }
938 
939 }
940 
941 
942 void BRQMIN::memoryInfo() const {
943 
944  int myPid = MyComm.MyPID();
945 
946  double maxHighMem = 0.0;
947  double tmp = highMem;
948  MyComm.MaxAll(&tmp, &maxHighMem, 1);
949 
950  double maxMemRequested = 0.0;
951  tmp = memRequested;
952  MyComm.MaxAll(&tmp, &maxMemRequested, 1);
953 
954  if (myPid == 0) {
955  cout.precision(2);
956  cout.setf(ios::fixed, ios::floatfield);
957  cout << " Memory requested per processor by the eigensolver = (EST) ";
958  cout.width(6);
959  cout << maxMemRequested << " MB " << endl;
960  cout << endl;
961  cout << " High water mark in eigensolver = (EST) ";
962  cout.width(6);
963  cout << maxHighMem << " MB " << endl;
964  cout << endl;
965  }
966 
967 }
968 
969 
970 void BRQMIN::operationInfo() const {
971 
972  int myPid = MyComm.MyPID();
973 
974  if (myPid == 0) {
975  cout << " --- Operations ---\n\n";
976  cout << " Total number of mass matrix multiplications = ";
977  cout.width(9);
978  cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
979  cout << " Mass multiplications in eigensolver = ";
980  cout.width(9);
981  cout << massOp << endl;
982  cout << " Mass multiplications for orthogonalization = ";
983  cout.width(9);
984  cout << modalTool.getNumProj_MassMult() << endl;
985  cout << " Mass multiplications for normalization = ";
986  cout.width(9);
987  cout << modalTool.getNumNorm_MassMult() << endl;
988  cout << " Total number of stiffness matrix operations = ";
989  cout.width(9);
990  cout << stifOp << endl;
991  cout << " Total number of preconditioner applications = ";
992  cout.width(9);
993  cout << precOp << endl;
994  cout << " Total number of computed eigen-residuals = ";
995  cout.width(9);
996  cout << residual << endl;
997  cout << "\n";
998  cout << " Total number of outer iterations = ";
999  cout.width(9);
1000  cout << outerIter << endl;
1001  cout << " Number of restarts = ";
1002  cout.width(9);
1003  cout << numRestart << endl;
1004  cout << "\n";
1005  }
1006 
1007 }
1008 
1009 
1010 void BRQMIN::timeInfo() const {
1011 
1012  int myPid = MyComm.MyPID();
1013 
1014  if (myPid == 0) {
1015  cout << " --- Timings ---\n\n";
1016  cout.setf(ios::fixed, ios::floatfield);
1017  cout.precision(2);
1018  cout << " Total time for outer loop = ";
1019  cout.width(9);
1020  cout << timeOuterLoop << " s" << endl;
1021  cout << " Time for mass matrix operations = ";
1022  cout.width(9);
1023  cout << timeMassOp << " s ";
1024  cout.width(5);
1025  cout << 100*timeMassOp/timeOuterLoop << " %\n";
1026  cout << " Time for stiffness matrix operations = ";
1027  cout.width(9);
1028  cout << timeStifOp << " s ";
1029  cout.width(5);
1030  cout << 100*timeStifOp/timeOuterLoop << " %\n";
1031  cout << " Time for preconditioner applications = ";
1032  cout.width(9);
1033  cout << timePrecOp << " s ";
1034  cout.width(5);
1035  cout << 100*timePrecOp/timeOuterLoop << " %\n";
1036  cout << " Time for defining search directions = ";
1037  cout.width(9);
1038  cout << timeSearchP << " s ";
1039  cout.width(5);
1040  cout << 100*timeSearchP/timeOuterLoop << " %\n";
1041  cout << " Time for orthogonalizations = ";
1042  cout.width(9);
1043  cout << timeOrtho << " s ";
1044  cout.width(5);
1045  cout << 100*timeOrtho/timeOuterLoop << " %\n";
1046  cout << " Projection step : ";
1047  cout.width(9);
1048  cout << modalTool.getTimeProj() << " s\n";
1049  cout << " Q^T mult. :: ";
1050  cout.width(9);
1051  cout << modalTool.getTimeProj_QtMult() << " s\n";
1052  cout << " Q mult. :: ";
1053  cout.width(9);
1054  cout << modalTool.getTimeProj_QMult() << " s\n";
1055  cout << " Mass mult. :: ";
1056  cout.width(9);
1057  cout << modalTool.getTimeProj_MassMult() << " s\n";
1058  cout << " Normalization step : ";
1059  cout.width(9);
1060  cout << modalTool.getTimeNorm() << " s\n";
1061  cout << " Mass mult. :: ";
1062  cout.width(9);
1063  cout << modalTool.getTimeNorm_MassMult() << " s\n";
1064  cout << " Time for local projection = ";
1065  cout.width(9);
1066  cout << timeLocalProj << " s ";
1067  cout.width(5);
1068  cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1069  cout << " Time for local eigensolve = ";
1070  cout.width(9);
1071  cout << timeLocalSolve << " s ";
1072  cout.width(5);
1073  cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1074  cout << " Time for local update = ";
1075  cout.width(9);
1076  cout << timeLocalUpdate << " s ";
1077  cout.width(5);
1078  cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1079  cout << " Time for residual computations = ";
1080  cout.width(9);
1081  cout << timeResidual << " s ";
1082  cout.width(5);
1083  cout << 100*timeResidual/timeOuterLoop << " %\n";
1084  cout << " Time for residuals norm computations = ";
1085  cout.width(9);
1086  cout << timeNorm << " s ";
1087  cout.width(5);
1088  cout << 100*timeNorm/timeOuterLoop << " %\n";
1089  cout << " Time for restarting space definition = ";
1090  cout.width(9);
1091  cout << timeRestart << " s ";
1092  cout.width(5);
1093  cout << 100*timeRestart/timeOuterLoop << " %\n";
1094  cout << "\n";
1095  cout << " Total time for post-processing = ";
1096  cout.width(9);
1097  cout << timePostProce << " s\n";
1098  cout << endl;
1099  } // if (myPid == 0)
1100 
1101 }
1102 
1103