Anasazi  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Davidson.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Anasazi: Block Eigensolvers Package
4 //
5 // Copyright 2004 NTESS and the Anasazi contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 //**************************************************************************
11 //
12 // NOTICE
13 //
14 // This software is a result of the research described in the cout
15 //
16 // " A comparison of algorithms for modal analysis in the absence
17 // of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
18 // Sandia National Laboratories, Technical cout SAND2003-1028J.
19 //
20 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
21 // framework ( http://software.sandia.gov/trilinos/ ).
22 //
23 // The distribution of this software follows also the rules defined in Trilinos.
24 // This notice shall be marked on any reproduction of this software, in whole or
25 // in part.
26 //
27 // This program is distributed in the hope that it will be useful, but
28 // WITHOUT ANY WARRANTY; without even the implied warranty of
29 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30 //
31 // Code Authors: U. Hetmaniuk (ulhetma@sandia.gov), R. Lehoucq (rblehou@sandia.gov)
32 //
33 //**************************************************************************
34 
35 #include "Davidson.h"
36 
37 
38 Davidson::Davidson(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
39  const Epetra_Operator *PP, int _blk, int _numBlk,
40  double _tol, int _maxIter, int _verb) :
41  myVerify(_Comm),
42  callBLAS(),
43  callFortran(),
44  modalTool(_Comm),
45  mySort(),
46  MyComm(_Comm),
47  K(KK),
48  M(0),
49  Prec(PP),
50  MyWatch(_Comm),
51  tolEigenSolve(_tol),
52  maxIterEigenSolve(_maxIter),
53  blockSize(_blk),
54  numBlock(_numBlk),
55  normWeight(0),
56  verbose(_verb),
57  historyCount(0),
58  resHistory(0),
59  maxSpaceSize(0),
60  sumSpaceSize(0),
61  spaceSizeHistory(0),
62  memRequested(0.0),
63  highMem(0.0),
64  massOp(0),
65  numRestart(0),
66  outerIter(0),
67  precOp(0),
68  residual(0),
69  stifOp(0),
70  timeLocalProj(0.0),
71  timeLocalSolve(0.0),
72  timeLocalUpdate(0.0),
73  timeMassOp(0.0),
74  timeNorm(0.0),
75  timeOrtho(0.0),
76  timeOuterLoop(0.0),
77  timePostProce(0.0),
78  timePrecOp(0.0),
79  timeResidual(0.0),
80  timeRestart(0.0),
81  timeStifOp(0.0)
82  {
83 
84 }
85 
86 
87 Davidson::Davidson(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
88  const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk, int _numBlk,
89  double _tol, int _maxIter, int _verb, double *_weight) :
90  myVerify(_Comm),
91  callBLAS(),
92  callFortran(),
93  modalTool(_Comm),
94  mySort(),
95  MyComm(_Comm),
96  K(KK),
97  M(MM),
98  Prec(PP),
99  MyWatch(_Comm),
100  tolEigenSolve(_tol),
101  maxIterEigenSolve(_maxIter),
102  blockSize(_blk),
103  numBlock(_numBlk),
104  normWeight(_weight),
105  verbose(_verb),
106  historyCount(0),
107  resHistory(0),
108  maxSpaceSize(0),
109  sumSpaceSize(0),
110  spaceSizeHistory(0),
111  memRequested(0.0),
112  highMem(0.0),
113  massOp(0),
114  numRestart(0),
115  outerIter(0),
116  precOp(0),
117  residual(0),
118  stifOp(0),
119  timeLocalProj(0.0),
120  timeLocalSolve(0.0),
121  timeLocalUpdate(0.0),
122  timeMassOp(0.0),
123  timeNorm(0.0),
124  timeOrtho(0.0),
125  timeOuterLoop(0.0),
126  timePostProce(0.0),
127  timePrecOp(0.0),
128  timeResidual(0.0),
129  timeRestart(0.0),
130  timeStifOp(0.0)
131  {
132 
133 }
134 
135 
136 Davidson::~Davidson() {
137 
138  if (resHistory) {
139  delete[] resHistory;
140  resHistory = 0;
141  }
142 
143  if (spaceSizeHistory) {
144  delete[] spaceSizeHistory;
145  spaceSizeHistory = 0;
146  }
147 
148 }
149 
150 
151 int Davidson::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
152 
153  return Davidson::reSolve(numEigen, Q, lambda);
154 
155 }
156 
157 
158 int Davidson::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
159 
160  // Computes the smallest eigenvalues and the corresponding eigenvectors
161  // of the generalized eigenvalue problem
162  //
163  // K X = M X Lambda
164  //
165  // using a generalized Davidson algorithm
166  //
167  // Note that if M is not specified, then K X = X Lambda is solved.
168  //
169  // Input variables:
170  //
171  // numEigen (integer) = Number of eigenmodes requested
172  //
173  // Q (Epetra_MultiVector) = Converged eigenvectors
174  // The number of columns of Q must be at least numEigen + blockSize.
175  // The rows of Q are distributed across processors.
176  // At exit, the first numEigen columns contain the eigenvectors requested.
177  //
178  // lambda (array of doubles) = Converged eigenvalues
179  // At input, it must be of size numEigen + blockSize.
180  // At exit, the first numEigen locations contain the eigenvalues requested.
181  //
182  // startingEV (integer) = Number of existing converged eigenvectors
183  // We assume that the user has check the eigenvectors and
184  // their M-orthonormality.
185  //
186  // Return information on status of computation
187  //
188  // info >= 0 >> Number of converged eigenpairs at the end of computation
189  //
190  // // Failure due to input arguments
191  //
192  // info = - 1 >> The stiffness matrix K has not been specified.
193  // info = - 2 >> The maps for the matrix K and the matrix M differ.
194  // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
195  // info = - 4 >> The maps for the vectors and the matrix K differ.
196  // info = - 5 >> Q is too small for the number of eigenvalues requested.
197  // info = - 6 >> Q is too small for the computation parameters.
198  //
199  // info = - 8 >> The number of blocks is too small for the number of eigenvalues.
200  //
201  // info = - 10 >> Failure during the mass orthonormalization
202  //
203  // info = - 30 >> MEMORY
204  //
205 
206  // Check the input parameters
207 
208  if (numEigen <= startingEV) {
209  return startingEV;
210  }
211 
212  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
213  if (info < 0)
214  return info;
215 
216  int myPid = MyComm.MyPID();
217 
218  if (numBlock*blockSize < numEigen) {
219  if (myPid == 0) {
220  cerr << endl;
221  cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
222  cerr << " the number of eigenvalues !!!\n";
223  cerr << " Number of blocks = " << numBlock << endl;
224  cerr << " Size of blocks = " << blockSize << endl;
225  cerr << " Number of eigenvalues = " << numEigen << endl;
226  cerr << endl;
227  }
228  return -8;
229  }
230 
231  // Get the weight for approximating the M-inverse norm
232  Epetra_Vector *vectWeight = 0;
233  if (normWeight) {
234  vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
235  }
236 
237  int knownEV = startingEV;
238  int localVerbose = verbose*(myPid==0);
239 
240  // Define local block vectors
241  //
242  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
243  // KX = Working vectors (storing K*X)
244  //
245  // R = Residuals
246 
247  int xr = Q.MyLength();
248  int dimSearch = blockSize*numBlock;
249 
250  Epetra_MultiVector X(View, Q, 0, dimSearch + blockSize);
251  if (knownEV > 0) {
252  Epetra_MultiVector copyX(View, Q, knownEV, blockSize);
253  copyX.Random();
254  }
255  else {
256  X.Random();
257  }
258 
259  int tmp;
260  tmp = (M == 0) ? 2*blockSize*xr : 3*blockSize*xr;
261 
262  double *work1 = new (nothrow) double[tmp];
263  if (work1 == 0) {
264  if (vectWeight)
265  delete vectWeight;
266  info = -30;
267  return info;
268  }
269  memRequested += sizeof(double)*tmp/(1024.0*1024.0);
270 
271  highMem = (highMem > currentSize()) ? highMem : currentSize();
272 
273  double *tmpD = work1;
274 
275  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
276  tmpD = tmpD + xr*blockSize;
277 
278  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
279  tmpD = (M) ? tmpD + xr*blockSize : tmpD;
280 
281  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
282 
283  // Define arrays
284  //
285  // theta = Store the local eigenvalues (size: dimSearch)
286  // normR = Store the norm of residuals (size: blockSize)
287  //
288  // KK = Local stiffness matrix (size: dimSearch x dimSearch)
289  //
290  // S = Local eigenvectors (size: dimSearch x dimSearch)
291  //
292  // tmpKK = Local workspace (size: blockSize x blockSize)
293 
294  int lwork2 = blockSize + dimSearch + 2*dimSearch*dimSearch + blockSize*blockSize;
295  double *work2 = new (nothrow) double[lwork2];
296  if (work2 == 0) {
297  if (vectWeight)
298  delete vectWeight;
299  delete[] work1;
300  info = -30;
301  return info;
302  }
303 
304  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
305  highMem = (highMem > currentSize()) ? highMem : currentSize();
306 
307  tmpD = work2;
308 
309  double *theta = tmpD;
310  tmpD = tmpD + dimSearch;
311 
312  double *normR = tmpD;
313  tmpD = tmpD + blockSize;
314 
315  double *KK = tmpD;
316  tmpD = tmpD + dimSearch*dimSearch;
317  memset(KK, 0, dimSearch*dimSearch*sizeof(double));
318 
319  double *S = tmpD;
320  tmpD = tmpD + dimSearch*dimSearch;
321 
322  double *tmpKK = tmpD;
323 
324  // Define an array to store the residuals history
325  if (localVerbose > 2) {
326  resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
327  spaceSizeHistory = new (nothrow) int[maxIterEigenSolve];
328  if ((resHistory == 0) || (spaceSizeHistory == 0)) {
329  if (vectWeight)
330  delete vectWeight;
331  delete[] work1;
332  delete[] work2;
333  info = -30;
334  return info;
335  }
336  historyCount = 0;
337  }
338 
339  // Miscellaneous definitions
340 
341  bool reStart = false;
342  numRestart = 0;
343 
344  bool criticalExit = false;
345 
346  int bStart = 0;
347  int offSet = 0;
348  numBlock = (dimSearch/blockSize) - (knownEV/blockSize);
349 
350  int nFound = blockSize;
351  int i, j;
352 
353  if (localVerbose > 0) {
354  cout << endl;
355  cout << " *|* Problem: ";
356  if (M)
357  cout << "K*Q = M*Q D ";
358  else
359  cout << "K*Q = Q D ";
360  if (Prec)
361  cout << " with preconditioner";
362  cout << endl;
363  cout << " *|* Algorithm = Davidson algorithm (block version)" << endl;
364  cout << " *|* Size of blocks = " << blockSize << endl;
365  cout << " *|* Largest size of search space = " << numBlock*blockSize << endl;
366  cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
367  cout.precision(2);
368  cout.setf(ios::scientific, ios::floatfield);
369  cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
370  cout << " *|* Norm used for convergence: ";
371  if (vectWeight)
372  cout << "weighted L2-norm with user-provided weights" << endl;
373  else
374  cout << "L^2-norm" << endl;
375  if (startingEV > 0)
376  cout << " *|* Input converged eigenvectors = " << startingEV << endl;
377  cout << "\n -- Start iterations -- \n";
378  }
379 
380  int maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
381 
382  timeOuterLoop -= MyWatch.WallTime();
383  outerIter = 0;
384  while (outerIter <= maxIterEigenSolve) {
385 
386  highMem = (highMem > currentSize()) ? highMem : currentSize();
387 
388  int nb;
389  for (nb = bStart; nb < maxBlock; ++nb) {
390 
391  outerIter += 1;
392  if (outerIter > maxIterEigenSolve)
393  break;
394 
395  int localSize = nb*blockSize;
396 
397  Epetra_MultiVector Xcurrent(View, X, localSize + knownEV, blockSize);
398 
399  timeMassOp -= MyWatch.WallTime();
400  if (M)
401  M->Apply(Xcurrent, MX);
402  timeMassOp += MyWatch.WallTime();
403  massOp += blockSize;
404 
405  // Orthonormalize X against the known eigenvectors and the previous vectors
406  // Note: Use R as a temporary work space
407  timeOrtho -= MyWatch.WallTime();
408  if (nb == bStart) {
409  if (nFound > 0) {
410  if (knownEV == 0) {
411  info = modalTool.massOrthonormalize(Xcurrent, MX, M, Q, nFound, 2, R.Values());
412  }
413  else {
414  Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
415  info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values());
416  }
417  }
418  nFound = 0;
419  }
420  else {
421  Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
422  info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, blockSize, 0, R.Values());
423  }
424  timeOrtho += MyWatch.WallTime();
425 
426  // Exit the code when the number of vectors exceeds the space dimension
427  if (info < 0) {
428  delete[] work1;
429  delete[] work2;
430  if (vectWeight)
431  delete vectWeight;
432  return -10;
433  }
434 
435  timeStifOp -= MyWatch.WallTime();
436  K->Apply(Xcurrent, KX);
437  timeStifOp += MyWatch.WallTime();
438  stifOp += blockSize;
439 
440  // Check the orthogonality properties of X
441  if (verbose > 2) {
442  if (knownEV + localSize == 0)
443  accuracyCheck(&Xcurrent, &MX, 0);
444  else {
445  Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
446  accuracyCheck(&Xcurrent, &MX, &copyQ);
447  }
448  if (localVerbose > 0)
449  cout << endl;
450  } // if (verbose > 2)
451 
452  // Define the local stiffness matrix
453  // Note: S is used as a workspace
454  timeLocalProj -= MyWatch.WallTime();
455  for (j = 0; j <= nb; ++j) {
456  callBLAS.GEMM('T', 'N', blockSize, blockSize, xr,
457  1.0, X.Values()+(knownEV+j*blockSize)*xr, xr, KX.Values(), xr,
458  0.0, tmpKK, blockSize);
459  MyComm.SumAll(tmpKK, S, blockSize*blockSize);
460  int iC;
461  for (iC = 0; iC < blockSize; ++iC) {
462  double *Kpointer = KK + localSize*dimSearch + j*blockSize + iC*dimSearch;
463  memcpy(Kpointer, S + iC*blockSize, blockSize*sizeof(double));
464  }
465  }
466  timeLocalProj += MyWatch.WallTime();
467 
468  // Perform a spectral decomposition
469  timeLocalSolve -= MyWatch.WallTime();
470  int nevLocal = localSize + blockSize;
471  info = modalTool.directSolver(localSize+blockSize, KK, dimSearch, 0, 0,
472  nevLocal, S, dimSearch, theta, localVerbose, 10);
473  timeLocalSolve += MyWatch.WallTime();
474 
475  if (info != 0) {
476  // Stop as spectral decomposition has a critical failure
477  if (info < 0) {
478  criticalExit = true;
479  break;
480  }
481  // Restart as spectral decomposition failed
482  if (localVerbose > 0) {
483  cout << " Iteration " << outerIter;
484  cout << "- Failure for spectral decomposition - RESTART with new random search\n";
485  }
486  reStart = true;
487  numRestart += 1;
488  timeRestart -= MyWatch.WallTime();
489  Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
490  Xinit.Random();
491  timeRestart += MyWatch.WallTime();
492  nFound = blockSize;
493  bStart = 0;
494  break;
495  } // if (info != 0)
496 
497  // Update the search space
498  // Note: Use KX as a workspace
499  timeLocalUpdate -= MyWatch.WallTime();
500  callBLAS.GEMM('N', 'N', xr, blockSize, localSize+blockSize, 1.0, X.Values()+knownEV*xr, xr,
501  S, dimSearch, 0.0, KX.Values(), xr);
502  timeLocalUpdate += MyWatch.WallTime();
503 
504  // Apply the mass matrix for the next block
505  timeMassOp -= MyWatch.WallTime();
506  if (M)
507  M->Apply(KX, MX);
508  timeMassOp += MyWatch.WallTime();
509  massOp += blockSize;
510 
511  // Apply the stiffness matrix for the next block
512  timeStifOp -= MyWatch.WallTime();
513  K->Apply(KX, R);
514  timeStifOp += MyWatch.WallTime();
515  stifOp += blockSize;
516 
517  // Form the residuals
518  timeResidual -= MyWatch.WallTime();
519  if (M) {
520  for (j = 0; j < blockSize; ++j) {
521  callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
522  }
523  }
524  else {
525  // Note KX contains the updated block
526  for (j = 0; j < blockSize; ++j) {
527  callBLAS.AXPY(xr, -theta[j], KX.Values() + j*xr, R.Values() + j*xr);
528  }
529  }
530  timeResidual += MyWatch.WallTime();
531  residual += blockSize;
532 
533  // Compute the norm of residuals
534  timeNorm -= MyWatch.WallTime();
535  if (vectWeight) {
536  R.NormWeighted(*vectWeight, normR);
537  }
538  else {
539  R.Norm2(normR);
540  }
541  // Scale the norms of residuals with the eigenvalues
542  // Count the number of converged eigenvectors
543  nFound = 0;
544  for (j = 0; j < blockSize; ++j) {
545  normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
546  if (normR[j] < tolEigenSolve)
547  nFound += 1;
548  } // for (j = 0; j < blockSize; ++j)
549  timeNorm += MyWatch.WallTime();
550 
551  // Store the residual history
552  if (localVerbose > 2) {
553  memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
554  spaceSizeHistory[historyCount] = localSize + blockSize;
555  historyCount += 1;
556  }
557  maxSpaceSize = (maxSpaceSize > localSize+blockSize) ? maxSpaceSize : localSize+blockSize;
558  sumSpaceSize += localSize + blockSize;
559 
560  // Print information on current iteration
561  if (localVerbose > 0) {
562  cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
563  cout << knownEV + nFound << endl;
564  } // if (localVerbose > 0)
565 
566  if (localVerbose > 1) {
567  cout << endl;
568  cout.precision(2);
569  cout.setf(ios::scientific, ios::floatfield);
570  for (i=0; i<blockSize; ++i) {
571  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
572  cout << " = " << normR[i] << endl;
573  }
574  cout << endl;
575  cout.precision(2);
576  for (i=0; i<nevLocal; ++i) {
577  cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
578  cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
579  cout << " = " << theta[i] << endl;
580  }
581  cout << endl;
582  }
583 
584  // Exit the loop to treat the converged eigenvectors
585  if (nFound > 0) {
586  nb += 1;
587  offSet = 0;
588  break;
589  }
590 
591  // Apply the preconditioner on the residuals
592  // Note: Use KX as a workspace
593  if (maxBlock == 1) {
594  if (Prec) {
595  timePrecOp -= MyWatch.WallTime();
596  Prec->ApplyInverse(R, Xcurrent);
597  timePrecOp += MyWatch.WallTime();
598  precOp += blockSize;
599  }
600  else {
601  memcpy(Xcurrent.Values(), R.Values(), blockSize*xr*sizeof(double));
602  }
603  timeRestart -= MyWatch.WallTime();
604  Xcurrent.Update(1.0, KX, -1.0);
605  timeRestart += MyWatch.WallTime();
606  break;
607  } // if (maxBlock == 1)
608 
609  if (nb == maxBlock - 1) {
610  nb += 1;
611  break;
612  }
613 
614  Epetra_MultiVector Xnext(View, X, knownEV+localSize+blockSize, blockSize);
615  if (Prec) {
616  timePrecOp -= MyWatch.WallTime();
617  Prec->ApplyInverse(R, Xnext);
618  timePrecOp += MyWatch.WallTime();
619  precOp += blockSize;
620  }
621  else {
622  memcpy(Xnext.Values(), R.Values(), blockSize*xr*sizeof(double));
623  }
624 
625  } // for (nb = bStart; nb < maxBlock; ++nb)
626 
627  if (outerIter > maxIterEigenSolve)
628  break;
629 
630  if (reStart == true) {
631  reStart = false;
632  continue;
633  }
634 
635  if (criticalExit == true)
636  break;
637 
638  // Store the final converged eigenvectors
639  if (knownEV + nFound >= numEigen) {
640  for (j = 0; j < blockSize; ++j) {
641  if (normR[j] < tolEigenSolve) {
642  memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
643  lambda[knownEV] = theta[j];
644  knownEV += 1;
645  }
646  }
647  if (localVerbose == 1) {
648  cout << endl;
649  cout.precision(2);
650  cout.setf(ios::scientific, ios::floatfield);
651  for (i=0; i<blockSize; ++i) {
652  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
653  cout << " = " << normR[i] << endl;
654  }
655  cout << endl;
656  }
657  break;
658  } // if (knownEV + nFound >= numEigen)
659 
660  // Treat the particular case of 1 block
661  if (maxBlock == 1) {
662  if (nFound > 0) {
663  double *Xpointer = X.Values() + (knownEV+nFound)*xr;
664  nFound = 0;
665  for (j = 0; j < blockSize; ++j) {
666  if (normR[j] < tolEigenSolve) {
667  memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
668  lambda[knownEV] = theta[j];
669  knownEV += 1;
670  nFound += 1;
671  }
672  else {
673  memcpy(Xpointer + (j-nFound)*xr, KX.Values() + j*xr, xr*sizeof(double));
674  }
675  }
676  Epetra_MultiVector Xnext(View, X, knownEV + blockSize - nFound, nFound);
677  Xnext.Random();
678  }
679  else {
680  nFound = blockSize;
681  }
682  continue;
683  }
684 
685  // Define the restarting block when maxBlock > 1
686  if (nFound > 0) {
687  int firstIndex = blockSize;
688  for (j = 0; j < blockSize; ++j) {
689  if (normR[j] >= tolEigenSolve) {
690  firstIndex = j;
691  break;
692  }
693  } // for (j = 0; j < blockSize; ++j)
694  while (firstIndex < nFound) {
695  for (j = firstIndex; j < blockSize; ++j) {
696  if (normR[j] < tolEigenSolve) {
697  // Swap the j-th and firstIndex-th position
698  callFortran.SWAP(nb*blockSize, S + j*dimSearch, 1, S + firstIndex*dimSearch, 1);
699  callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
700  callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
701  break;
702  }
703  } // for (j = firstIndex; j < blockSize; ++j)
704  for (j = 0; j < blockSize; ++j) {
705  if (normR[j] >= tolEigenSolve) {
706  firstIndex = j;
707  break;
708  }
709  } // for (j = 0; j < blockSize; ++j)
710  } // while (firstIndex < nFound)
711 
712  // Copy the converged eigenvalues
713  memcpy(lambda + knownEV, theta, nFound*sizeof(double));
714 
715  } // if (nFound > 0)
716 
717  // Define the restarting size
718  bStart = ((nb - offSet) > 2) ? (nb - offSet)/2 : 0;
719 
720  // Define the restarting space and local stiffness
721  timeRestart -= MyWatch.WallTime();
722  memset(KK, 0, nb*blockSize*dimSearch*sizeof(double));
723  for (j = 0; j < bStart*blockSize; ++j) {
724  KK[j + j*dimSearch] = theta[j + nFound];
725  }
726  // Form the restarting space
727  int oldCol = nb*blockSize;
728  int newCol = nFound + (bStart+1)*blockSize;
729  newCol = (newCol > oldCol) ? oldCol : newCol;
730  callFortran.GEQRF(oldCol, newCol, S, dimSearch, theta, R.Values(), xr*blockSize, &info);
731  callFortran.ORMQR('R', 'N', xr, oldCol, newCol, S, dimSearch, theta,
732  X.Values()+knownEV*xr, xr, R.Values(), blockSize*xr, &info);
733  timeRestart += MyWatch.WallTime();
734 
735  if (nFound == 0)
736  offSet += 1;
737 
738  knownEV += nFound;
739  maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
740 
741  // Put random vectors if the Rayleigh Ritz vectors are not enough
742  newCol = nFound + (bStart+1)*blockSize;
743  if (newCol > oldCol) {
744  Epetra_MultiVector Xnext(View, X, knownEV+blockSize-nFound, nFound);
745  Xnext.Random();
746  continue;
747  }
748 
749  nFound = 0;
750 
751  } // while (outerIter <= maxIterEigenSolve)
752  timeOuterLoop += MyWatch.WallTime();
753  highMem = (highMem > currentSize()) ? highMem : currentSize();
754 
755  // Clean memory
756  delete[] work1;
757  delete[] work2;
758  if (vectWeight)
759  delete vectWeight;
760 
761  // Sort the eigenpairs
762  timePostProce -= MyWatch.WallTime();
763  if ((info == 0) && (knownEV > 0)) {
764  mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
765  }
766  timePostProce += MyWatch.WallTime();
767 
768  return (info == 0) ? knownEV : info;
769 
770 }
771 
772 
773 void Davidson::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
774  const Epetra_MultiVector *Q) const {
775 
776  cout.precision(2);
777  cout.setf(ios::scientific, ios::floatfield);
778  double tmp;
779 
780  int myPid = MyComm.MyPID();
781 
782  if (X) {
783  if (M) {
784  if (MX) {
785  tmp = myVerify.errorEquality(X, MX, M);
786  if (myPid == 0)
787  cout << " >> Difference between MX and M*X = " << tmp << endl;
788  }
789  tmp = myVerify.errorOrthonormality(X, M);
790  if (myPid == 0)
791  cout << " >> Error in X^T M X - I = " << tmp << endl;
792  }
793  else {
794  tmp = myVerify.errorOrthonormality(X, 0);
795  if (myPid == 0)
796  cout << " >> Error in X^T X - I = " << tmp << endl;
797  }
798  }
799 
800  if (Q == 0)
801  return;
802 
803  if (M) {
804  tmp = myVerify.errorOrthonormality(Q, M);
805  if (myPid == 0)
806  cout << " >> Error in Q^T M Q - I = " << tmp << endl;
807  if (X) {
808  tmp = myVerify.errorOrthogonality(Q, X, M);
809  if (myPid == 0)
810  cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
811  }
812  }
813  else {
814  tmp = myVerify.errorOrthonormality(Q, 0);
815  if (myPid == 0)
816  cout << " >> Error in Q^T Q - I = " << tmp << endl;
817  if (X) {
818  tmp = myVerify.errorOrthogonality(Q, X, 0);
819  if (myPid == 0)
820  cout << " >> Orthogonality Q^T X up to " << tmp << endl;
821  }
822  }
823 
824 }
825 
826 
827 int Davidson::minimumSpaceDimension(int nev) const {
828 
829  int myPid = MyComm.MyPID();
830 
831  if ((myPid == 0) && (numBlock*blockSize < nev)) {
832  cerr << endl;
833  cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
834  cerr << " the number of eigenvalues !!!\n";
835  cerr << " Number of blocks = " << numBlock << endl;
836  cerr << " Size of blocks = " << blockSize << endl;
837  cerr << " Number of eigenvalues = " << nev << endl;
838  cerr << endl;
839  return -1;
840  }
841 
842  return nev + blockSize;
843 
844 }
845 
846 
847 void Davidson::initializeCounters() {
848 
849  historyCount = 0;
850  if (resHistory) {
851  delete[] resHistory;
852  resHistory = 0;
853  }
854 
855  maxSpaceSize = 0;
856  sumSpaceSize = 0;
857  if (spaceSizeHistory) {
858  delete[] spaceSizeHistory;
859  spaceSizeHistory = 0;
860  }
861 
862  memRequested = 0.0;
863  highMem = 0.0;
864 
865  massOp = 0;
866  numRestart = 0;
867  outerIter = 0;
868  precOp = 0;
869  residual = 0;
870  stifOp = 0;
871 
872  timeLocalProj = 0.0;
873  timeLocalSolve = 0.0;
874  timeLocalUpdate = 0.0;
875  timeMassOp = 0.0;
876  timeNorm = 0.0;
877  timeOrtho = 0.0;
878  timeOuterLoop = 0.0;
879  timePostProce = 0.0;
880  timePrecOp = 0.0;
881  timeResidual = 0.0;
882  timeRestart = 0.0;
883  timeStifOp = 0.0;
884 
885 }
886 
887 
888 void Davidson::algorithmInfo() const {
889 
890  int myPid = MyComm.MyPID();
891 
892  if (myPid ==0) {
893  cout << " Algorithm: Davidson algorithm (block version)\n";
894  cout << " Block Size: " << blockSize << endl;
895  cout << " Number of Blocks kept: " << numBlock << endl;
896  }
897 
898 }
899 
900 
901 void Davidson::historyInfo() const {
902 
903  if (resHistory) {
904  int j;
905  cout << " Block Size: " << blockSize << endl;
906  cout << endl;
907  cout << " Residuals Search Space Dim.\n";
908  cout << endl;
909  cout.precision(4);
910  cout.setf(ios::scientific, ios::floatfield);
911  for (j = 0; j < historyCount; ++j) {
912  int ii;
913  for (ii = 0; ii < blockSize; ++ii) {
914  cout << resHistory[blockSize*j + ii] << " ";
915  cout.width(4);
916  cout << spaceSizeHistory[j] << endl;
917  }
918  }
919  cout << endl;
920  }
921 
922 }
923 
924 
925 void Davidson::memoryInfo() const {
926 
927  int myPid = MyComm.MyPID();
928 
929  double maxHighMem = 0.0;
930  double tmp = highMem;
931  MyComm.MaxAll(&tmp, &maxHighMem, 1);
932 
933  double maxMemRequested = 0.0;
934  tmp = memRequested;
935  MyComm.MaxAll(&tmp, &maxMemRequested, 1);
936 
937  if (myPid == 0) {
938  cout.precision(2);
939  cout.setf(ios::fixed, ios::floatfield);
940  cout << " Memory requested per processor by the eigensolver = (EST) ";
941  cout.width(6);
942  cout << maxMemRequested << " MB " << endl;
943  cout << endl;
944  cout << " High water mark in eigensolver = (EST) ";
945  cout.width(6);
946  cout << maxHighMem << " MB " << endl;
947  cout << endl;
948  }
949 
950 }
951 
952 
953 void Davidson::operationInfo() const {
954 
955  int myPid = MyComm.MyPID();
956 
957  if (myPid == 0) {
958  cout << " --- Operations ---\n\n";
959  cout << " Total number of mass matrix multiplications = ";
960  cout.width(9);
961  cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
962  cout << " Mass multiplications in solver loop = ";
963  cout.width(9);
964  cout << massOp << endl;
965  cout << " Mass multiplications for orthogonalization = ";
966  cout.width(9);
967  cout << modalTool.getNumProj_MassMult() << endl;
968  cout << " Mass multiplications for normalization = ";
969  cout.width(9);
970  cout << modalTool.getNumNorm_MassMult() << endl;
971  cout << " Total number of stiffness matrix operations = ";
972  cout.width(9);
973  cout << stifOp << endl;
974  cout << " Total number of preconditioner applications = ";
975  cout.width(9);
976  cout << precOp << endl;
977  cout << " Total number of computed eigen-residuals = ";
978  cout.width(9);
979  cout << residual << endl;
980  cout << "\n";
981  cout << " Total number of outer iterations = ";
982  cout.width(9);
983  cout << outerIter << endl;
984  cout << " Number of restarts = ";
985  cout.width(9);
986  cout << numRestart << endl;
987  cout << endl;
988  cout << " Maximum size of search space = ";
989  cout.width(9);
990  cout << maxSpaceSize << endl;
991  cout << " Average size of search space = ";
992  cout.setf(ios::fixed, ios::floatfield);
993  cout.precision(2);
994  cout.width(9);
995  cout << ((double) sumSpaceSize)/((double) outerIter) << endl;
996  cout << endl;
997  }
998 
999 }
1000 
1001 
1002 void Davidson::timeInfo() const {
1003 
1004  int myPid = MyComm.MyPID();
1005 
1006  if (myPid == 0) {
1007  cout << " --- Timings ---\n\n";
1008  cout.setf(ios::fixed, ios::floatfield);
1009  cout.precision(2);
1010  cout << " Total time for outer loop = ";
1011  cout.width(9);
1012  cout << timeOuterLoop << " s" << endl;
1013  cout << " Time for mass matrix operations = ";
1014  cout.width(9);
1015  cout << timeMassOp << " s ";
1016  cout.width(5);
1017  cout << 100*timeMassOp/timeOuterLoop << " %\n";
1018  cout << " Time for stiffness matrix operations = ";
1019  cout.width(9);
1020  cout << timeStifOp << " s ";
1021  cout.width(5);
1022  cout << 100*timeStifOp/timeOuterLoop << " %\n";
1023  cout << " Time for preconditioner applications = ";
1024  cout.width(9);
1025  cout << timePrecOp << " s ";
1026  cout.width(5);
1027  cout << 100*timePrecOp/timeOuterLoop << " %\n";
1028  cout << " Time for orthogonalizations = ";
1029  cout.width(9);
1030  cout << timeOrtho << " s ";
1031  cout.width(5);
1032  cout << 100*timeOrtho/timeOuterLoop << " %\n";
1033  cout << " Projection step : ";
1034  cout.width(9);
1035  cout << modalTool.getTimeProj() << " s\n";
1036 // cout << " Q^T mult. :: ";
1037 // cout.width(9);
1038 // cout << modalTool.getTimeProj_QtMult() << " s\n";
1039 // cout << " Q mult. :: ";
1040 // cout.width(9);
1041 // cout << modalTool.getTimeProj_QMult() << " s\n";
1042 // cout << " Mass mult. :: ";
1043 // cout.width(9);
1044 // cout << modalTool.getTimeProj_MassMult() << " s\n";
1045  cout << " Normalization step : ";
1046  cout.width(9);
1047  cout << modalTool.getTimeNorm() << " s\n";
1048 // cout << " Mass mult. :: ";
1049 // cout.width(9);
1050 // cout << modalTool.getTimeNorm_MassMult() << " s\n";
1051  cout << " Time for local projection = ";
1052  cout.width(9);
1053  cout << timeLocalProj << " s ";
1054  cout.width(5);
1055  cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1056  cout << " Time for local eigensolve = ";
1057  cout.width(9);
1058  cout << timeLocalSolve << " s ";
1059  cout.width(5);
1060  cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1061  cout << " Time for local update = ";
1062  cout.width(9);
1063  cout << timeLocalUpdate << " s ";
1064  cout.width(5);
1065  cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1066  cout << " Time for residual computations = ";
1067  cout.width(9);
1068  cout << timeResidual << " s ";
1069  cout.width(5);
1070  cout << 100*timeResidual/timeOuterLoop << " %\n";
1071  cout << " Time for residuals norm computations = ";
1072  cout.width(9);
1073  cout << timeNorm << " s ";
1074  cout.width(5);
1075  cout << 100*timeNorm/timeOuterLoop << " %\n";
1076  cout << " Time for restarting space definition = ";
1077  cout.width(9);
1078  cout << timeRestart << " s ";
1079  cout.width(5);
1080  cout << 100*timeRestart/timeOuterLoop << " %\n";
1081  cout << "\n";
1082  cout << " Total time for post-processing = ";
1083  cout.width(9);
1084  cout << timePostProce << " s\n";
1085  cout << endl;
1086  } // if (myPid == 0)
1087 
1088 }
1089 
1090