Anasazi  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
BlockDACG.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Anasazi: Block Eigensolvers Package
4 //
5 // Copyright 2004 NTESS and the Anasazi contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // This software is a result of the research described in the report
11 //
12 // "A comparison of algorithms for modal analysis in the absence
13 // of a sparse direct method", P. Arbenz, R. Lehoucq, and U. Hetmaniuk,
14 // Sandia National Laboratories, Technical report SAND2003-1028J.
15 //
16 // It is based on the Epetra, AztecOO, and ML packages defined in the Trilinos
17 // framework ( http://trilinos.org/ ).
18 
19 #include "BlockDACG.h"
20 
21 
22 BlockDACG::BlockDACG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
23  const Epetra_Operator *PP, int _blk,
24  double _tol, int _maxIter, int _verb) :
25  myVerify(_Comm),
26  callBLAS(),
27  callFortran(),
28  modalTool(_Comm),
29  mySort(),
30  MyComm(_Comm),
31  K(KK),
32  M(0),
33  Prec(PP),
34  MyWatch(_Comm),
35  tolEigenSolve(_tol),
36  maxIterEigenSolve(_maxIter),
37  blockSize(_blk),
38  normWeight(0),
39  verbose(_verb),
40  historyCount(0),
41  resHistory(0),
42  memRequested(0.0),
43  highMem(0.0),
44  massOp(0),
45  numRestart(0),
46  outerIter(0),
47  precOp(0),
48  residual(0),
49  stifOp(0),
50  timeLocalProj(0.0),
51  timeLocalSolve(0.0),
52  timeLocalUpdate(0.0),
53  timeMassOp(0.0),
54  timeNorm(0.0),
55  timeOrtho(0.0),
56  timeOuterLoop(0.0),
57  timePostProce(0.0),
58  timePrecOp(0.0),
59  timeResidual(0.0),
60  timeRestart(0.0),
61  timeSearchP(0.0),
62  timeStifOp(0.0)
63  {
64 
65 }
66 
67 
68 BlockDACG::BlockDACG(const Epetra_Comm &_Comm, const Epetra_Operator *KK,
69  const Epetra_Operator *MM, const Epetra_Operator *PP, int _blk,
70  double _tol, int _maxIter, int _verb,
71  double *_weight) :
72  myVerify(_Comm),
73  callBLAS(),
74  callFortran(),
75  modalTool(_Comm),
76  mySort(),
77  MyComm(_Comm),
78  K(KK),
79  M(MM),
80  Prec(PP),
81  MyWatch(_Comm),
82  tolEigenSolve(_tol),
83  maxIterEigenSolve(_maxIter),
84  blockSize(_blk),
85  normWeight(_weight),
86  verbose(_verb),
87  historyCount(0),
88  resHistory(0),
89  memRequested(0.0),
90  highMem(0.0),
91  massOp(0),
92  numRestart(0),
93  outerIter(0),
94  precOp(0),
95  residual(0),
96  stifOp(0),
97  timeLocalProj(0.0),
98  timeLocalSolve(0.0),
99  timeLocalUpdate(0.0),
100  timeMassOp(0.0),
101  timeNorm(0.0),
102  timeOrtho(0.0),
103  timeOuterLoop(0.0),
104  timePostProce(0.0),
105  timePrecOp(0.0),
106  timeResidual(0.0),
107  timeRestart(0.0),
108  timeSearchP(0.0),
109  timeStifOp(0.0)
110  {
111 
112 }
113 
114 
115 BlockDACG::~BlockDACG() {
116 
117  if (resHistory) {
118  delete[] resHistory;
119  resHistory = 0;
120  }
121 
122 }
123 
124 
125 int BlockDACG::solve(int numEigen, Epetra_MultiVector &Q, double *lambda) {
126 
127  return BlockDACG::reSolve(numEigen, Q, lambda);
128 }
129 
130 
131 int BlockDACG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {
132 
133  // Computes the smallest eigenvalues and the corresponding eigenvectors
134  // of the generalized eigenvalue problem
135  //
136  // K X = M X Lambda
137  //
138  // using a Block Deflation Accelerated Conjugate Gradient algorithm.
139  //
140  // Note that if M is not specified, then K X = X Lambda is solved.
141  //
142  // Ref: P. Arbenz & R. Lehoucq, "A comparison of algorithms for modal analysis in the
143  // absence of a sparse direct method", SNL, Technical Report SAND2003-1028J
144  // With the notations of this report, the coefficient beta is defined as
145  // diag( H^T_{k} G_{k} ) / diag( H^T_{k-1} G_{k-1} )
146  //
147  // Input variables:
148  //
149  // numEigen (integer) = Number of eigenmodes requested
150  //
151  // Q (Epetra_MultiVector) = Converged eigenvectors
152  // The number of columns of Q must be equal to numEigen + blockSize.
153  // The rows of Q are distributed across processors.
154  // At exit, the first numEigen columns contain the eigenvectors requested.
155  //
156  // lambda (array of doubles) = Converged eigenvalues
157  // At input, it must be of size numEigen + blockSize.
158  // At exit, the first numEigen locations contain the eigenvalues requested.
159  //
160  // startingEV (integer) = Number of existing converged eigenmodes
161  //
162  // Return information on status of computation
163  //
164  // info >= 0 >> Number of converged eigenpairs at the end of computation
165  //
166  // // Failure due to input arguments
167  //
168  // info = - 1 >> The stiffness matrix K has not been specified.
169  // info = - 2 >> The maps for the matrix K and the matrix M differ.
170  // info = - 3 >> The maps for the matrix K and the preconditioner P differ.
171  // info = - 4 >> The maps for the vectors and the matrix K differ.
172  // info = - 5 >> Q is too small for the number of eigenvalues requested.
173  // info = - 6 >> Q is too small for the computation parameters.
174  //
175  // info = - 10 >> Failure during the mass orthonormalization
176  //
177  // info = - 20 >> Error in LAPACK during the local eigensolve
178  //
179  // info = - 30 >> MEMORY
180  //
181 
182  // Check the input parameters
183 
184  if (numEigen <= startingEV) {
185  return startingEV;
186  }
187 
188  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen + blockSize);
189  if (info < 0)
190  return info;
191 
192  int myPid = MyComm.MyPID();
193 
194  // Get the weight for approximating the M-inverse norm
195  Epetra_Vector *vectWeight = 0;
196  if (normWeight) {
197  vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
198  }
199 
200  int knownEV = startingEV;
201  int localVerbose = verbose*(myPid==0);
202 
203  // Define local block vectors
204  //
205  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
206  // KX = Working vectors (storing K*X)
207  //
208  // R = Residuals
209  //
210  // H = Preconditioned residuals
211  //
212  // P = Search directions
213  // MP = Working vectors (storing M*P if M is specified, else pointing to P)
214  // KP = Working vectors (storing K*P)
215 
216  int xr = Q.MyLength();
217  Epetra_MultiVector X(View, Q, numEigen, blockSize);
218  X.Random();
219 
220  int tmp;
221  tmp = (M == 0) ? 5*blockSize*xr : 7*blockSize*xr;
222 
223  double *work1 = new (nothrow) double[tmp];
224  if (work1 == 0) {
225  if (vectWeight)
226  delete vectWeight;
227  info = -30;
228  return info;
229  }
230  memRequested += sizeof(double)*tmp/(1024.0*1024.0);
231 
232  highMem = (highMem > currentSize()) ? highMem : currentSize();
233 
234  double *tmpD = work1;
235 
236  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
237  tmpD = tmpD + xr*blockSize;
238 
239  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
240  tmpD = (M) ? tmpD + xr*blockSize : tmpD;
241 
242  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
243  tmpD = tmpD + xr*blockSize;
244 
245  Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
246  tmpD = tmpD + xr*blockSize;
247 
248  Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
249  tmpD = tmpD + xr*blockSize;
250 
251  Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
252  tmpD = tmpD + xr*blockSize;
253 
254  Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);
255 
256  // Define arrays
257  //
258  // theta = Store the local eigenvalues (size: 2*blockSize)
259  // normR = Store the norm of residuals (size: blockSize)
260  //
261  // oldHtR = Store the previous H_i^T*R_i (size: blockSize)
262  // currentHtR = Store the current H_i^T*R_i (size: blockSize)
263  //
264  // MM = Local mass matrix (size: 2*blockSize x 2*blockSize)
265  // KK = Local stiffness matrix (size: 2*blockSize x 2*blockSize)
266  //
267  // S = Local eigenvectors (size: 2*blockSize x 2*blockSize)
268 
269  int lwork2;
270  lwork2 = 5*blockSize + 12*blockSize*blockSize;
271  double *work2 = new (nothrow) double[lwork2];
272  if (work2 == 0) {
273  if (vectWeight)
274  delete vectWeight;
275  delete[] work1;
276  info = -30;
277  return info;
278  }
279 
280  highMem = (highMem > currentSize()) ? highMem : currentSize();
281 
282  tmpD = work2;
283 
284  double *theta = tmpD;
285  tmpD = tmpD + 2*blockSize;
286 
287  double *normR = tmpD;
288  tmpD = tmpD + blockSize;
289 
290  double *oldHtR = tmpD;
291  tmpD = tmpD + blockSize;
292 
293  double *currentHtR = tmpD;
294  tmpD = tmpD + blockSize;
295  memset(currentHtR, 0, blockSize*sizeof(double));
296 
297  double *MM = tmpD;
298  tmpD = tmpD + 4*blockSize*blockSize;
299 
300  double *KK = tmpD;
301  tmpD = tmpD + 4*blockSize*blockSize;
302 
303  double *S = tmpD;
304 
305  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
306 
307  // Define an array to store the residuals history
308  if (localVerbose > 2) {
309  resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
310  if (resHistory == 0) {
311  if (vectWeight)
312  delete vectWeight;
313  delete[] work1;
314  delete[] work2;
315  info = -30;
316  return info;
317  }
318  historyCount = 0;
319  }
320 
321  // Miscellaneous definitions
322 
323  bool reStart = false;
324  numRestart = 0;
325 
326  int localSize;
327  int twoBlocks = 2*blockSize;
328  int nFound = blockSize;
329  int i, j;
330 
331  if (localVerbose > 0) {
332  cout << endl;
333  cout << " *|* Problem: ";
334  if (M)
335  cout << "K*Q = M*Q D ";
336  else
337  cout << "K*Q = Q D ";
338  if (Prec)
339  cout << " with preconditioner";
340  cout << endl;
341  cout << " *|* Algorithm = DACG (block version)" << endl;
342  cout << " *|* Size of blocks = " << blockSize << endl;
343  cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
344  cout.precision(2);
345  cout.setf(ios::scientific, ios::floatfield);
346  cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
347  cout << " *|* Norm used for convergence: ";
348  if (normWeight)
349  cout << "weighted L2-norm with user-provided weights" << endl;
350  else
351  cout << "L^2-norm" << endl;
352  if (startingEV > 0)
353  cout << " *|* Input converged eigenvectors = " << startingEV << endl;
354  cout << "\n -- Start iterations -- \n";
355  }
356 
357  timeOuterLoop -= MyWatch.WallTime();
358  for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
359 
360  highMem = (highMem > currentSize()) ? highMem : currentSize();
361 
362  if ((outerIter == 1) || (reStart == true)) {
363 
364  reStart = false;
365  localSize = blockSize;
366 
367  if (nFound > 0) {
368 
369  Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
370  Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
371  Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);
372 
373  // Apply the mass matrix to X
374  timeMassOp -= MyWatch.WallTime();
375  if (M)
376  M->Apply(X2, MX2);
377  timeMassOp += MyWatch.WallTime();
378  massOp += nFound;
379 
380  if (knownEV > 0) {
381  // Orthonormalize X against the known eigenvectors with Gram-Schmidt
382  // Note: Use R as a temporary work space
383  Epetra_MultiVector copyQ(View, Q, 0, knownEV);
384  timeOrtho -= MyWatch.WallTime();
385  info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values());
386  timeOrtho += MyWatch.WallTime();
387  // Exit the code if the orthogonalization did not succeed
388  if (info < 0) {
389  info = -10;
390  delete[] work1;
391  delete[] work2;
392  if (vectWeight)
393  delete vectWeight;
394  return info;
395  }
396  }
397 
398  // Apply the stiffness matrix to X
399  timeStifOp -= MyWatch.WallTime();
400  K->Apply(X2, KX2);
401  timeStifOp += MyWatch.WallTime();
402  stifOp += nFound;
403 
404  } // if (nFound > 0)
405 
406  } // if ((outerIter == 1) || (reStart == true))
407  else {
408 
409  // Apply the preconditioner on the residuals
410  if (Prec != 0) {
411  timePrecOp -= MyWatch.WallTime();
412  Prec->ApplyInverse(R, H);
413  timePrecOp += MyWatch.WallTime();
414  precOp += blockSize;
415  }
416  else {
417  memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
418  }
419 
420  // Compute the product H^T*R
421  timeSearchP -= MyWatch.WallTime();
422  memcpy(oldHtR, currentHtR, blockSize*sizeof(double));
423  H.Dot(R, currentHtR);
424  // Define the new search directions
425  if (localSize == blockSize) {
426  P.Scale(-1.0, H);
427  localSize = twoBlocks;
428  } // if (localSize == blockSize)
429  else {
430  bool hasZeroDot = false;
431  for (j = 0; j < blockSize; ++j) {
432  if (oldHtR[j] == 0.0) {
433  hasZeroDot = true;
434  break;
435  }
436  callBLAS.SCAL(xr, currentHtR[j]/oldHtR[j], P.Values() + j*xr);
437  }
438  if (hasZeroDot == true) {
439  // Restart the computation when there is a null dot product
440  if (localVerbose > 0) {
441  cout << endl;
442  cout << " !! Null dot product -- Restart the search space !!\n";
443  cout << endl;
444  }
445  if (blockSize == 1) {
446  X.Random();
447  nFound = blockSize;
448  }
449  else {
450  Epetra_MultiVector Xinit(View, X, j, blockSize-j);
451  Xinit.Random();
452  nFound = blockSize - j;
453  } // if (blockSize == 1)
454  reStart = true;
455  numRestart += 1;
456  info = 0;
457  continue;
458  }
459  callBLAS.AXPY(xr*blockSize, -1.0, H.Values(), P.Values());
460  } // if (localSize == blockSize)
461  timeSearchP += MyWatch.WallTime();
462 
463  // Apply the mass matrix on P
464  timeMassOp -= MyWatch.WallTime();
465  if (M)
466  M->Apply(P, MP);
467  timeMassOp += MyWatch.WallTime();
468  massOp += blockSize;
469 
470  if (knownEV > 0) {
471  // Orthogonalize P against the known eigenvectors
472  // Note: Use R as a temporary work space
473  Epetra_MultiVector copyQ(View, Q, 0, knownEV);
474  timeOrtho -= MyWatch.WallTime();
475  modalTool.massOrthonormalize(P, MP, M, copyQ, blockSize, 1, R.Values());
476  timeOrtho += MyWatch.WallTime();
477  }
478 
479  // Apply the stiffness matrix to P
480  timeStifOp -= MyWatch.WallTime();
481  K->Apply(P, KP);
482  timeStifOp += MyWatch.WallTime();
483  stifOp += blockSize;
484 
485  } // if ((outerIter == 1) || (reStart == true))
486 
487  // Form "local" mass and stiffness matrices
488  // Note: Use S as a temporary workspace
489  timeLocalProj -= MyWatch.WallTime();
490  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
491  KK, localSize, S);
492  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
493  MM, localSize, S);
494  if (localSize > blockSize) {
495  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
496  KK + blockSize*localSize, localSize, S);
497  modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
498  KK + blockSize*localSize + blockSize, localSize, S);
499  modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
500  MM + blockSize*localSize, localSize, S);
501  modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
502  MM + blockSize*localSize + blockSize, localSize, S);
503  } // if (localSize > blockSize)
504  timeLocalProj += MyWatch.WallTime();
505 
506  // Perform a spectral decomposition
507  timeLocalSolve -= MyWatch.WallTime();
508  int nevLocal = localSize;
509  info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
510  S, localSize, theta, localVerbose,
511  (blockSize == 1) ? 1: 0);
512  timeLocalSolve += MyWatch.WallTime();
513 
514  if (info < 0) {
515  // Stop when spectral decomposition has a critical failure
516  break;
517  }
518 
519  // Check for restarting
520  if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
521  if (localVerbose > 0) {
522  cout << " Iteration " << outerIter;
523  cout << "- Failure for spectral decomposition - RESTART with new random search\n";
524  }
525  if (blockSize == 1) {
526  X.Random();
527  nFound = blockSize;
528  }
529  else {
530  Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
531  Xinit.Random();
532  nFound = blockSize - 1;
533  } // if (blockSize == 1)
534  reStart = true;
535  numRestart += 1;
536  info = 0;
537  continue;
538  } // if ((theta[0] < 0.0) || (nevLocal < blockSize))
539 
540  if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
541  for (j = 0; j < nevLocal; ++j)
542  memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
543  localSize = blockSize;
544  }
545 
546  // Check the direction of eigenvectors
547  // Note: This sign check is important for convergence
548  for (j = 0; j < nevLocal; ++j) {
549  double coeff = S[j + j*localSize];
550  if (coeff < 0.0)
551  callBLAS.SCAL(localSize, -1.0, S + j*localSize);
552  }
553 
554  // Compute the residuals
555  timeResidual -= MyWatch.WallTime();
556  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
557  S, localSize, 0.0, R.Values(), xr);
558  if (localSize == twoBlocks) {
559  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
560  S + blockSize, localSize, 1.0, R.Values(), xr);
561  }
562  for (j = 0; j < blockSize; ++j)
563  callBLAS.SCAL(localSize, theta[j], S + j*localSize);
564  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
565  S, localSize, 1.0, R.Values(), xr);
566  if (localSize == twoBlocks) {
567  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
568  S + blockSize, localSize, 1.0, R.Values(), xr);
569  }
570  for (j = 0; j < blockSize; ++j)
571  callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
572  timeResidual += MyWatch.WallTime();
573 
574  // Compute the norms of the residuals
575  timeNorm -= MyWatch.WallTime();
576  if (vectWeight)
577  R.NormWeighted(*vectWeight, normR);
578  else
579  R.Norm2(normR);
580  // Scale the norms of residuals with the eigenvalues
581  // Count the converged eigenvectors
582  nFound = 0;
583  for (j = 0; j < blockSize; ++j) {
584  normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
585  if (normR[j] < tolEigenSolve)
586  nFound += 1;
587  }
588  timeNorm += MyWatch.WallTime();
589 
590  // Store the residual history
591  if (localVerbose > 2) {
592  memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
593  historyCount += 1;
594  }
595 
596  // Print information on current iteration
597  if (localVerbose > 0) {
598  cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
599  cout << knownEV + nFound << endl;
600  }
601 
602  if (localVerbose > 1) {
603  cout << endl;
604  cout.precision(2);
605  cout.setf(ios::scientific, ios::floatfield);
606  for (i=0; i<blockSize; ++i) {
607  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
608  cout << " = " << normR[i] << endl;
609  }
610  cout << endl;
611  cout.precision(2);
612  for (i=0; i<blockSize; ++i) {
613  cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
614  cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
615  cout << " = " << theta[i] << endl;
616  }
617  cout << endl;
618  }
619 
620  if (nFound == 0) {
621  // Update the spaces
622  // Note: Use H as a temporary work space
623  timeLocalUpdate -= MyWatch.WallTime();
624  memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double));
625  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
626  0.0, X.Values(), xr);
627  memcpy(H.Values(), KX.Values(), xr*blockSize*sizeof(double));
628  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
629  0.0, KX.Values(), xr);
630  if (M) {
631  memcpy(H.Values(), MX.Values(), xr*blockSize*sizeof(double));
632  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
633  0.0, MX.Values(), xr);
634  }
635  if (localSize == twoBlocks) {
636  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, P.Values(), xr,
637  S + blockSize, localSize, 1.0, X.Values(), xr);
638  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
639  S + blockSize, localSize, 1.0, KX.Values(), xr);
640  if (M) {
641  callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MP.Values(), xr,
642  S + blockSize, localSize, 1.0, MX.Values(), xr);
643  }
644  } // if (localSize == twoBlocks)
645  timeLocalUpdate += MyWatch.WallTime();
646  // When required, monitor some orthogonalities
647  if (verbose > 2) {
648  if (knownEV == 0) {
649  accuracyCheck(&X, &MX, &R, 0, (localSize>blockSize) ? &P : 0);
650  }
651  else {
652  Epetra_MultiVector copyQ(View, Q, 0, knownEV);
653  accuracyCheck(&X, &MX, &R, &copyQ, (localSize>blockSize) ? &P : 0);
654  }
655  } // if (verbose > 2)
656  continue;
657  } // if (nFound == 0)
658 
659  // Order the Ritz eigenvectors by putting the converged vectors at the beginning
660  int firstIndex = blockSize;
661  for (j = 0; j < blockSize; ++j) {
662  if (normR[j] >= tolEigenSolve) {
663  firstIndex = j;
664  break;
665  }
666  } // for (j = 0; j < blockSize; ++j)
667  while (firstIndex < nFound) {
668  for (j = firstIndex; j < blockSize; ++j) {
669  if (normR[j] < tolEigenSolve) {
670  // Swap the j-th and firstIndex-th position
671  callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
672  callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
673  callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
674  break;
675  }
676  } // for (j = firstIndex; j < blockSize; ++j)
677  for (j = 0; j < blockSize; ++j) {
678  if (normR[j] >= tolEigenSolve) {
679  firstIndex = j;
680  break;
681  }
682  } // for (j = 0; j < blockSize; ++j)
683  } // while (firstIndex < nFound)
684 
685  // Copy the converged eigenvalues
686  memcpy(lambda + knownEV, theta, nFound*sizeof(double));
687 
688  // Convergence test
689  if (knownEV + nFound >= numEigen) {
690  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
691  S, localSize, 0.0, R.Values(), xr);
692  if (localSize > blockSize) {
693  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
694  S + blockSize, localSize, 1.0, R.Values(), xr);
695  }
696  memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
697  knownEV += nFound;
698  if (localVerbose == 1) {
699  cout << endl;
700  cout.precision(2);
701  cout.setf(ios::scientific, ios::floatfield);
702  for (i=0; i<blockSize; ++i) {
703  cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
704  cout << " = " << normR[i] << endl;
705  }
706  cout << endl;
707  }
708  break;
709  }
710 
711  // Store the converged eigenvalues and eigenvectors
712  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
713  S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
714  if (localSize == twoBlocks) {
715  callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
716  S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
717  }
718  knownEV += nFound;
719 
720  // Define the restarting vectors
721  timeRestart -= MyWatch.WallTime();
722  int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
723  double *Snew = S + nFound*localSize;
724  memcpy(H.Values(), X.Values(), blockSize*xr*sizeof(double));
725  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
726  Snew, localSize, 0.0, X.Values(), xr);
727  memcpy(H.Values(), KX.Values(), blockSize*xr*sizeof(double));
728  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
729  Snew, localSize, 0.0, KX.Values(), xr);
730  if (M) {
731  memcpy(H.Values(), MX.Values(), blockSize*xr*sizeof(double));
732  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
733  Snew, localSize, 0.0, MX.Values(), xr);
734  }
735  if (localSize == twoBlocks) {
736  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
737  Snew+blockSize, localSize, 1.0, X.Values(), xr);
738  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
739  Snew+blockSize, localSize, 1.0, KX.Values(), xr);
740  if (M) {
741  callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
742  Snew+blockSize, localSize, 1.0, MX.Values(), xr);
743  }
744  } // if (localSize == twoBlocks)
745  if (nevLocal < blockSize + nFound) {
746  // Put new random vectors at the end of the block
747  Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
748  Xtmp.Random();
749  }
750  else {
751  nFound = 0;
752  } // if (nevLocal < blockSize + nFound)
753  reStart = true;
754  timeRestart += MyWatch.WallTime();
755 
756  } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
757  timeOuterLoop += MyWatch.WallTime();
758  highMem = (highMem > currentSize()) ? highMem : currentSize();
759 
760  // Clean memory
761  delete[] work1;
762  delete[] work2;
763  if (vectWeight)
764  delete vectWeight;
765 
766  // Sort the eigenpairs
767  timePostProce -= MyWatch.WallTime();
768  if ((info == 0) && (knownEV > 0)) {
769  mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
770  }
771  timePostProce += MyWatch.WallTime();
772 
773  return (info == 0) ? knownEV : info;
774 
775 }
776 
777 
778 void BlockDACG::accuracyCheck(const Epetra_MultiVector *X, const Epetra_MultiVector *MX,
779  const Epetra_MultiVector *R, const Epetra_MultiVector *Q,
780  const Epetra_MultiVector *P) const {
781 
782  cout.precision(2);
783  cout.setf(ios::scientific, ios::floatfield);
784  double tmp;
785 
786  int myPid = MyComm.MyPID();
787 
788  if (X) {
789  if (M) {
790  if (MX) {
791  tmp = myVerify.errorEquality(X, MX, M);
792  if (myPid == 0)
793  cout << " >> Difference between MX and M*X = " << tmp << endl;
794  }
795  tmp = myVerify.errorOrthonormality(X, M);
796  if (myPid == 0)
797  cout << " >> Error in X^T M X - I = " << tmp << endl;
798  }
799  else {
800  tmp = myVerify.errorOrthonormality(X, 0);
801  if (myPid == 0)
802  cout << " >> Error in X^T X - I = " << tmp << endl;
803  }
804  }
805 
806  if ((R) && (X)) {
807  tmp = myVerify.errorOrthogonality(X, R);
808  if (myPid == 0)
809  cout << " >> Orthogonality X^T R up to " << tmp << endl;
810  }
811 
812  if (Q == 0)
813  return;
814 
815  if (M) {
816  tmp = myVerify.errorOrthonormality(Q, M);
817  if (myPid == 0)
818  cout << " >> Error in Q^T M Q - I = " << tmp << endl;
819  if (X) {
820  tmp = myVerify.errorOrthogonality(Q, X, M);
821  if (myPid == 0)
822  cout << " >> Orthogonality Q^T M X up to " << tmp << endl;
823  }
824  if (P) {
825  tmp = myVerify.errorOrthogonality(Q, P, M);
826  if (myPid == 0)
827  cout << " >> Orthogonality Q^T M P up to " << tmp << endl;
828  }
829  }
830  else {
831  tmp = myVerify.errorOrthonormality(Q, 0);
832  if (myPid == 0)
833  cout << " >> Error in Q^T Q - I = " << tmp << endl;
834  if (X) {
835  tmp = myVerify.errorOrthogonality(Q, X, 0);
836  if (myPid == 0)
837  cout << " >> Orthogonality Q^T X up to " << tmp << endl;
838  }
839  if (P) {
840  tmp = myVerify.errorOrthogonality(Q, P, 0);
841  if (myPid == 0)
842  cout << " >> Orthogonality Q^T P up to " << tmp << endl;
843  }
844  }
845 
846 }
847 
848 
849 void BlockDACG::initializeCounters() {
850 
851  historyCount = 0;
852  if (resHistory) {
853  delete[] resHistory;
854  resHistory = 0;
855  }
856 
857  memRequested = 0.0;
858  highMem = 0.0;
859 
860  massOp = 0;
861  numRestart = 0;
862  outerIter = 0;
863  precOp = 0;
864  residual = 0;
865  stifOp = 0;
866 
867  timeLocalProj = 0.0;
868  timeLocalSolve = 0.0;
869  timeLocalUpdate = 0.0;
870  timeMassOp = 0.0;
871  timeNorm = 0.0;
872  timeOrtho = 0.0;
873  timeOuterLoop = 0.0;
874  timePostProce = 0.0;
875  timePrecOp = 0.0;
876  timeResidual = 0.0;
877  timeRestart = 0.0;
878  timeSearchP = 0.0;
879  timeStifOp = 0.0;
880 
881 
882 }
883 
884 
885 void BlockDACG::algorithmInfo() const {
886 
887  int myPid = MyComm.MyPID();
888 
889  if (myPid ==0) {
890  cout << " Algorithm: DACG (block version) with Cholesky-based local eigensolver\n";
891  cout << " Block Size: " << blockSize << endl;
892  }
893 
894 }
895 
896 
897 void BlockDACG::historyInfo() const {
898 
899  if (resHistory) {
900  int j;
901  cout << " Block Size: " << blockSize << endl;
902  cout << endl;
903  cout << " Residuals\n";
904  cout << endl;
905  cout.precision(4);
906  cout.setf(ios::scientific, ios::floatfield);
907  for (j = 0; j < historyCount; ++j) {
908  int ii;
909  for (ii = 0; ii < blockSize; ++ii)
910  cout << resHistory[blockSize*j + ii] << endl;
911  }
912  cout << endl;
913  }
914 
915 }
916 
917 
918 void BlockDACG::memoryInfo() const {
919 
920  int myPid = MyComm.MyPID();
921 
922  double maxHighMem = 0.0;
923  double tmp = highMem;
924  MyComm.MaxAll(&tmp, &maxHighMem, 1);
925 
926  double maxMemRequested = 0.0;
927  tmp = memRequested;
928  MyComm.MaxAll(&tmp, &maxMemRequested, 1);
929 
930  if (myPid == 0) {
931  cout.precision(2);
932  cout.setf(ios::fixed, ios::floatfield);
933  cout << " Memory requested per processor by the eigensolver = (EST) ";
934  cout.width(6);
935  cout << maxMemRequested << " MB " << endl;
936  cout << endl;
937  cout << " High water mark in eigensolver = (EST) ";
938  cout.width(6);
939  cout << maxHighMem << " MB " << endl;
940  cout << endl;
941  }
942 
943 }
944 
945 
946 void BlockDACG::operationInfo() const {
947 
948  int myPid = MyComm.MyPID();
949 
950  if (myPid == 0) {
951  cout << " --- Operations ---\n\n";
952  cout << " Total number of mass matrix multiplications = ";
953  cout.width(9);
954  cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
955  cout << " Mass multiplications in eigensolver = ";
956  cout.width(9);
957  cout << massOp << endl;
958  cout << " Mass multiplications for orthogonalization = ";
959  cout.width(9);
960  cout << modalTool.getNumProj_MassMult() << endl;
961  cout << " Mass multiplications for normalization = ";
962  cout.width(9);
963  cout << modalTool.getNumNorm_MassMult() << endl;
964  cout << " Total number of stiffness matrix operations = ";
965  cout.width(9);
966  cout << stifOp << endl;
967  cout << " Total number of preconditioner applications = ";
968  cout.width(9);
969  cout << precOp << endl;
970  cout << " Total number of computed eigen-residuals = ";
971  cout.width(9);
972  cout << residual << endl;
973  cout << "\n";
974  cout << " Total number of outer iterations = ";
975  cout.width(9);
976  cout << outerIter << endl;
977  cout << " Number of restarts = ";
978  cout.width(9);
979  cout << numRestart << endl;
980  cout << "\n";
981  }
982 
983 }
984 
985 
986 void BlockDACG::timeInfo() const {
987 
988  int myPid = MyComm.MyPID();
989 
990  if (myPid == 0) {
991  cout << " --- Timings ---\n\n";
992  cout.setf(ios::fixed, ios::floatfield);
993  cout.precision(2);
994  cout << " Total time for outer loop = ";
995  cout.width(9);
996  cout << timeOuterLoop << " s" << endl;
997  cout << " Time for mass matrix operations = ";
998  cout.width(9);
999  cout << timeMassOp << " s ";
1000  cout.width(5);
1001  cout << 100*timeMassOp/timeOuterLoop << " %\n";
1002  cout << " Time for stiffness matrix operations = ";
1003  cout.width(9);
1004  cout << timeStifOp << " s ";
1005  cout.width(5);
1006  cout << 100*timeStifOp/timeOuterLoop << " %\n";
1007  cout << " Time for preconditioner applications = ";
1008  cout.width(9);
1009  cout << timePrecOp << " s ";
1010  cout.width(5);
1011  cout << 100*timePrecOp/timeOuterLoop << " %\n";
1012  cout << " Time for defining search directions = ";
1013  cout.width(9);
1014  cout << timeSearchP << " s ";
1015  cout.width(5);
1016  cout << 100*timeSearchP/timeOuterLoop << " %\n";
1017  cout << " Time for orthogonalizations = ";
1018  cout.width(9);
1019  cout << timeOrtho << " s ";
1020  cout.width(5);
1021  cout << 100*timeOrtho/timeOuterLoop << " %\n";
1022  cout << " Projection step : ";
1023  cout.width(9);
1024  cout << modalTool.getTimeProj() << " s\n";
1025  cout << " Q^T mult. :: ";
1026  cout.width(9);
1027  cout << modalTool.getTimeProj_QtMult() << " s\n";
1028  cout << " Q mult. :: ";
1029  cout.width(9);
1030  cout << modalTool.getTimeProj_QMult() << " s\n";
1031  cout << " Mass mult. :: ";
1032  cout.width(9);
1033  cout << modalTool.getTimeProj_MassMult() << " s\n";
1034  cout << " Normalization step : ";
1035  cout.width(9);
1036  cout << modalTool.getTimeNorm() << " s\n";
1037  cout << " Mass mult. :: ";
1038  cout.width(9);
1039  cout << modalTool.getTimeNorm_MassMult() << " s\n";
1040  cout << " Time for local projection = ";
1041  cout.width(9);
1042  cout << timeLocalProj << " s ";
1043  cout.width(5);
1044  cout << 100*timeLocalProj/timeOuterLoop << " %\n";
1045  cout << " Time for local eigensolve = ";
1046  cout.width(9);
1047  cout << timeLocalSolve << " s ";
1048  cout.width(5);
1049  cout << 100*timeLocalSolve/timeOuterLoop << " %\n";
1050  cout << " Time for local update = ";
1051  cout.width(9);
1052  cout << timeLocalUpdate << " s ";
1053  cout.width(5);
1054  cout << 100*timeLocalUpdate/timeOuterLoop << " %\n";
1055  cout << " Time for residual computations = ";
1056  cout.width(9);
1057  cout << timeResidual << " s ";
1058  cout.width(5);
1059  cout << 100*timeResidual/timeOuterLoop << " %\n";
1060  cout << " Time for residuals norm computations = ";
1061  cout.width(9);
1062  cout << timeNorm << " s ";
1063  cout.width(5);
1064  cout << 100*timeNorm/timeOuterLoop << " %\n";
1065  cout << " Time for restarting space definition = ";
1066  cout.width(9);
1067  cout << timeRestart << " s ";
1068  cout.width(5);
1069  cout << 100*timeRestart/timeOuterLoop << " %\n";
1070  cout << "\n";
1071  cout << " Total time for post-processing = ";
1072  cout.width(9);
1073  cout << timePostProce << " s\n";
1074  cout << endl;
1075  } // if (myPid == 0)
1076 
1077 }
1078 
1079