40 double _tol,
int _maxIter,
int _verb) :
52 maxIterEigenSolve(_maxIter),
89 double _tol,
int _maxIter,
int _verb,
double *_weight) :
101 maxIterEigenSolve(_maxIter),
121 timeLocalUpdate(0.0),
136 Davidson::~Davidson() {
143 if (spaceSizeHistory) {
144 delete[] spaceSizeHistory;
145 spaceSizeHistory = 0;
153 return Davidson::reSolve(numEigen, Q, lambda);
158 int Davidson::reSolve(
int numEigen,
Epetra_MultiVector &Q,
double *lambda,
int startingEV) {
208 if (numEigen <= startingEV) {
212 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
216 int myPid = MyComm.MyPID();
218 if (numBlock*blockSize < numEigen) {
221 cerr <<
" !!! The space dimension (# of blocks x size of blocks) must be greater than ";
222 cerr <<
" the number of eigenvalues !!!\n";
223 cerr <<
" Number of blocks = " << numBlock << endl;
224 cerr <<
" Size of blocks = " << blockSize << endl;
225 cerr <<
" Number of eigenvalues = " << numEigen << endl;
237 int knownEV = startingEV;
238 int localVerbose = verbose*(myPid==0);
247 int xr = Q.MyLength();
248 int dimSearch = blockSize*numBlock;
260 tmp = (M == 0) ? 2*blockSize*xr : 3*blockSize*xr;
262 double *work1 =
new (nothrow)
double[tmp];
269 memRequested +=
sizeof(double)*tmp/(1024.0*1024.0);
271 highMem = (highMem > currentSize()) ? highMem : currentSize();
273 double *tmpD = work1;
276 tmpD = tmpD + xr*blockSize;
279 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
294 int lwork2 = blockSize + dimSearch + 2*dimSearch*dimSearch + blockSize*blockSize;
295 double *work2 =
new (nothrow)
double[lwork2];
304 memRequested +=
sizeof(double)*lwork2/(1024.0*1024.0);
305 highMem = (highMem > currentSize()) ? highMem : currentSize();
309 double *theta = tmpD;
310 tmpD = tmpD + dimSearch;
312 double *normR = tmpD;
313 tmpD = tmpD + blockSize;
316 tmpD = tmpD + dimSearch*dimSearch;
317 memset(KK, 0, dimSearch*dimSearch*
sizeof(
double));
320 tmpD = tmpD + dimSearch*dimSearch;
322 double *tmpKK = tmpD;
325 if (localVerbose > 2) {
326 resHistory =
new (nothrow)
double[maxIterEigenSolve*blockSize];
327 spaceSizeHistory =
new (nothrow)
int[maxIterEigenSolve];
328 if ((resHistory == 0) || (spaceSizeHistory == 0)) {
341 bool reStart =
false;
344 bool criticalExit =
false;
348 numBlock = (dimSearch/blockSize) - (knownEV/blockSize);
350 int nFound = blockSize;
353 if (localVerbose > 0) {
355 cout <<
" *|* Problem: ";
357 cout <<
"K*Q = M*Q D ";
359 cout <<
"K*Q = Q D ";
361 cout <<
" with preconditioner";
363 cout <<
" *|* Algorithm = Davidson algorithm (block version)" << endl;
364 cout <<
" *|* Size of blocks = " << blockSize << endl;
365 cout <<
" *|* Largest size of search space = " << numBlock*blockSize << endl;
366 cout <<
" *|* Number of requested eigenvalues = " << numEigen << endl;
368 cout.setf(ios::scientific, ios::floatfield);
369 cout <<
" *|* Tolerance for convergence = " << tolEigenSolve << endl;
370 cout <<
" *|* Norm used for convergence: ";
372 cout <<
"weighted L2-norm with user-provided weights" << endl;
374 cout <<
"L^2-norm" << endl;
376 cout <<
" *|* Input converged eigenvectors = " << startingEV << endl;
377 cout <<
"\n -- Start iterations -- \n";
380 int maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
382 timeOuterLoop -= MyWatch.WallTime();
384 while (outerIter <= maxIterEigenSolve) {
386 highMem = (highMem > currentSize()) ? highMem : currentSize();
389 for (nb = bStart; nb < maxBlock; ++nb) {
392 if (outerIter > maxIterEigenSolve)
395 int localSize = nb*blockSize;
399 timeMassOp -= MyWatch.WallTime();
401 M->Apply(Xcurrent, MX);
402 timeMassOp += MyWatch.WallTime();
407 timeOrtho -= MyWatch.WallTime();
411 info = modalTool.massOrthonormalize(Xcurrent, MX, M, Q, nFound, 2, R.Values());
415 info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values());
422 info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, blockSize, 0, R.Values());
424 timeOrtho += MyWatch.WallTime();
435 timeStifOp -= MyWatch.WallTime();
436 K->Apply(Xcurrent, KX);
437 timeStifOp += MyWatch.WallTime();
442 if (knownEV + localSize == 0)
443 accuracyCheck(&Xcurrent, &MX, 0);
446 accuracyCheck(&Xcurrent, &MX, ©Q);
448 if (localVerbose > 0)
454 timeLocalProj -= MyWatch.WallTime();
455 for (j = 0; j <= nb; ++j) {
456 callBLAS.GEMM(
'T',
'N', blockSize, blockSize, xr,
457 1.0, X.Values()+(knownEV+j*blockSize)*xr, xr, KX.Values(), xr,
458 0.0, tmpKK, blockSize);
459 MyComm.SumAll(tmpKK, S, blockSize*blockSize);
461 for (iC = 0; iC < blockSize; ++iC) {
462 double *Kpointer = KK + localSize*dimSearch + j*blockSize + iC*dimSearch;
463 memcpy(Kpointer, S + iC*blockSize, blockSize*
sizeof(
double));
466 timeLocalProj += MyWatch.WallTime();
469 timeLocalSolve -= MyWatch.WallTime();
470 int nevLocal = localSize + blockSize;
471 info = modalTool.directSolver(localSize+blockSize, KK, dimSearch, 0, 0,
472 nevLocal, S, dimSearch, theta, localVerbose, 10);
473 timeLocalSolve += MyWatch.WallTime();
482 if (localVerbose > 0) {
483 cout <<
" Iteration " << outerIter;
484 cout <<
"- Failure for spectral decomposition - RESTART with new random search\n";
488 timeRestart -= MyWatch.WallTime();
491 timeRestart += MyWatch.WallTime();
499 timeLocalUpdate -= MyWatch.WallTime();
500 callBLAS.GEMM(
'N',
'N', xr, blockSize, localSize+blockSize, 1.0, X.Values()+knownEV*xr, xr,
501 S, dimSearch, 0.0, KX.Values(), xr);
502 timeLocalUpdate += MyWatch.WallTime();
505 timeMassOp -= MyWatch.WallTime();
508 timeMassOp += MyWatch.WallTime();
512 timeStifOp -= MyWatch.WallTime();
514 timeStifOp += MyWatch.WallTime();
518 timeResidual -= MyWatch.WallTime();
520 for (j = 0; j < blockSize; ++j) {
521 callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
526 for (j = 0; j < blockSize; ++j) {
527 callBLAS.AXPY(xr, -theta[j], KX.Values() + j*xr, R.Values() + j*xr);
530 timeResidual += MyWatch.WallTime();
531 residual += blockSize;
534 timeNorm -= MyWatch.WallTime();
536 R.NormWeighted(*vectWeight, normR);
544 for (j = 0; j < blockSize; ++j) {
545 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
546 if (normR[j] < tolEigenSolve)
549 timeNorm += MyWatch.WallTime();
552 if (localVerbose > 2) {
553 memcpy(resHistory + historyCount*blockSize, normR, blockSize*
sizeof(
double));
554 spaceSizeHistory[historyCount] = localSize + blockSize;
557 maxSpaceSize = (maxSpaceSize > localSize+blockSize) ? maxSpaceSize : localSize+blockSize;
558 sumSpaceSize += localSize + blockSize;
561 if (localVerbose > 0) {
562 cout <<
" Iteration " << outerIter <<
" - Number of converged eigenvectors ";
563 cout << knownEV + nFound << endl;
566 if (localVerbose > 1) {
569 cout.setf(ios::scientific, ios::floatfield);
570 for (i=0; i<blockSize; ++i) {
571 cout <<
" Iteration " << outerIter <<
" - Scaled Norm of Residual " << i;
572 cout <<
" = " << normR[i] << endl;
576 for (i=0; i<nevLocal; ++i) {
577 cout <<
" Iteration " << outerIter <<
" - Ritz eigenvalue " << i;
578 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
579 cout <<
" = " << theta[i] << endl;
595 timePrecOp -= MyWatch.WallTime();
596 Prec->ApplyInverse(R, Xcurrent);
597 timePrecOp += MyWatch.WallTime();
601 memcpy(Xcurrent.Values(), R.Values(), blockSize*xr*
sizeof(double));
603 timeRestart -= MyWatch.WallTime();
604 Xcurrent.Update(1.0, KX, -1.0);
605 timeRestart += MyWatch.WallTime();
609 if (nb == maxBlock - 1) {
616 timePrecOp -= MyWatch.WallTime();
617 Prec->ApplyInverse(R, Xnext);
618 timePrecOp += MyWatch.WallTime();
622 memcpy(Xnext.Values(), R.Values(), blockSize*xr*
sizeof(double));
627 if (outerIter > maxIterEigenSolve)
630 if (reStart ==
true) {
635 if (criticalExit ==
true)
639 if (knownEV + nFound >= numEigen) {
640 for (j = 0; j < blockSize; ++j) {
641 if (normR[j] < tolEigenSolve) {
642 memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*
sizeof(double));
643 lambda[knownEV] = theta[j];
647 if (localVerbose == 1) {
650 cout.setf(ios::scientific, ios::floatfield);
651 for (i=0; i<blockSize; ++i) {
652 cout <<
" Iteration " << outerIter <<
" - Scaled Norm of Residual " << i;
653 cout <<
" = " << normR[i] << endl;
663 double *Xpointer = X.Values() + (knownEV+nFound)*xr;
665 for (j = 0; j < blockSize; ++j) {
666 if (normR[j] < tolEigenSolve) {
667 memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*
sizeof(double));
668 lambda[knownEV] = theta[j];
673 memcpy(Xpointer + (j-nFound)*xr, KX.Values() + j*xr, xr*
sizeof(double));
687 int firstIndex = blockSize;
688 for (j = 0; j < blockSize; ++j) {
689 if (normR[j] >= tolEigenSolve) {
694 while (firstIndex < nFound) {
695 for (j = firstIndex; j < blockSize; ++j) {
696 if (normR[j] < tolEigenSolve) {
698 callFortran.SWAP(nb*blockSize, S + j*dimSearch, 1, S + firstIndex*dimSearch, 1);
699 callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
700 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
704 for (j = 0; j < blockSize; ++j) {
705 if (normR[j] >= tolEigenSolve) {
713 memcpy(lambda + knownEV, theta, nFound*
sizeof(
double));
718 bStart = ((nb - offSet) > 2) ? (nb - offSet)/2 : 0;
721 timeRestart -= MyWatch.WallTime();
722 memset(KK, 0, nb*blockSize*dimSearch*
sizeof(
double));
723 for (j = 0; j < bStart*blockSize; ++j) {
724 KK[j + j*dimSearch] = theta[j + nFound];
727 int oldCol = nb*blockSize;
728 int newCol = nFound + (bStart+1)*blockSize;
729 newCol = (newCol > oldCol) ? oldCol : newCol;
730 callFortran.GEQRF(oldCol, newCol, S, dimSearch, theta, R.Values(), xr*blockSize, &info);
731 callFortran.ORMQR(
'R',
'N', xr, oldCol, newCol, S, dimSearch, theta,
732 X.Values()+knownEV*xr, xr, R.Values(), blockSize*xr, &info);
733 timeRestart += MyWatch.WallTime();
739 maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);
742 newCol = nFound + (bStart+1)*blockSize;
743 if (newCol > oldCol) {
752 timeOuterLoop += MyWatch.WallTime();
753 highMem = (highMem > currentSize()) ? highMem : currentSize();
762 timePostProce -= MyWatch.WallTime();
763 if ((info == 0) && (knownEV > 0)) {
764 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
766 timePostProce += MyWatch.WallTime();
768 return (info == 0) ? knownEV : info;
777 cout.setf(ios::scientific, ios::floatfield);
780 int myPid = MyComm.MyPID();
785 tmp = myVerify.errorEquality(X, MX, M);
787 cout <<
" >> Difference between MX and M*X = " << tmp << endl;
789 tmp = myVerify.errorOrthonormality(X, M);
791 cout <<
" >> Error in X^T M X - I = " << tmp << endl;
794 tmp = myVerify.errorOrthonormality(X, 0);
796 cout <<
" >> Error in X^T X - I = " << tmp << endl;
804 tmp = myVerify.errorOrthonormality(Q, M);
806 cout <<
" >> Error in Q^T M Q - I = " << tmp << endl;
808 tmp = myVerify.errorOrthogonality(Q, X, M);
810 cout <<
" >> Orthogonality Q^T M X up to " << tmp << endl;
814 tmp = myVerify.errorOrthonormality(Q, 0);
816 cout <<
" >> Error in Q^T Q - I = " << tmp << endl;
818 tmp = myVerify.errorOrthogonality(Q, X, 0);
820 cout <<
" >> Orthogonality Q^T X up to " << tmp << endl;
827 int Davidson::minimumSpaceDimension(
int nev)
const {
829 int myPid = MyComm.MyPID();
831 if ((myPid == 0) && (numBlock*blockSize < nev)) {
833 cerr <<
" !!! The space dimension (# of blocks x size of blocks) must be greater than ";
834 cerr <<
" the number of eigenvalues !!!\n";
835 cerr <<
" Number of blocks = " << numBlock << endl;
836 cerr <<
" Size of blocks = " << blockSize << endl;
837 cerr <<
" Number of eigenvalues = " << nev << endl;
842 return nev + blockSize;
847 void Davidson::initializeCounters() {
857 if (spaceSizeHistory) {
858 delete[] spaceSizeHistory;
859 spaceSizeHistory = 0;
873 timeLocalSolve = 0.0;
874 timeLocalUpdate = 0.0;
888 void Davidson::algorithmInfo()
const {
890 int myPid = MyComm.MyPID();
893 cout <<
" Algorithm: Davidson algorithm (block version)\n";
894 cout <<
" Block Size: " << blockSize << endl;
895 cout <<
" Number of Blocks kept: " << numBlock << endl;
901 void Davidson::historyInfo()
const {
905 cout <<
" Block Size: " << blockSize << endl;
907 cout <<
" Residuals Search Space Dim.\n";
910 cout.setf(ios::scientific, ios::floatfield);
911 for (j = 0; j < historyCount; ++j) {
913 for (ii = 0; ii < blockSize; ++ii) {
914 cout << resHistory[blockSize*j + ii] <<
" ";
916 cout << spaceSizeHistory[j] << endl;
925 void Davidson::memoryInfo()
const {
927 int myPid = MyComm.MyPID();
929 double maxHighMem = 0.0;
930 double tmp = highMem;
931 MyComm.MaxAll(&tmp, &maxHighMem, 1);
933 double maxMemRequested = 0.0;
935 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
939 cout.setf(ios::fixed, ios::floatfield);
940 cout <<
" Memory requested per processor by the eigensolver = (EST) ";
942 cout << maxMemRequested <<
" MB " << endl;
944 cout <<
" High water mark in eigensolver = (EST) ";
946 cout << maxHighMem <<
" MB " << endl;
953 void Davidson::operationInfo()
const {
955 int myPid = MyComm.MyPID();
958 cout <<
" --- Operations ---\n\n";
959 cout <<
" Total number of mass matrix multiplications = ";
961 cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
962 cout <<
" Mass multiplications in solver loop = ";
964 cout << massOp << endl;
965 cout <<
" Mass multiplications for orthogonalization = ";
967 cout << modalTool.getNumProj_MassMult() << endl;
968 cout <<
" Mass multiplications for normalization = ";
970 cout << modalTool.getNumNorm_MassMult() << endl;
971 cout <<
" Total number of stiffness matrix operations = ";
973 cout << stifOp << endl;
974 cout <<
" Total number of preconditioner applications = ";
976 cout << precOp << endl;
977 cout <<
" Total number of computed eigen-residuals = ";
979 cout << residual << endl;
981 cout <<
" Total number of outer iterations = ";
983 cout << outerIter << endl;
984 cout <<
" Number of restarts = ";
986 cout << numRestart << endl;
988 cout <<
" Maximum size of search space = ";
990 cout << maxSpaceSize << endl;
991 cout <<
" Average size of search space = ";
992 cout.setf(ios::fixed, ios::floatfield);
995 cout << ((double) sumSpaceSize)/((double) outerIter) << endl;
1002 void Davidson::timeInfo()
const {
1004 int myPid = MyComm.MyPID();
1007 cout <<
" --- Timings ---\n\n";
1008 cout.setf(ios::fixed, ios::floatfield);
1010 cout <<
" Total time for outer loop = ";
1012 cout << timeOuterLoop <<
" s" << endl;
1013 cout <<
" Time for mass matrix operations = ";
1015 cout << timeMassOp <<
" s ";
1017 cout << 100*timeMassOp/timeOuterLoop <<
" %\n";
1018 cout <<
" Time for stiffness matrix operations = ";
1020 cout << timeStifOp <<
" s ";
1022 cout << 100*timeStifOp/timeOuterLoop <<
" %\n";
1023 cout <<
" Time for preconditioner applications = ";
1025 cout << timePrecOp <<
" s ";
1027 cout << 100*timePrecOp/timeOuterLoop <<
" %\n";
1028 cout <<
" Time for orthogonalizations = ";
1030 cout << timeOrtho <<
" s ";
1032 cout << 100*timeOrtho/timeOuterLoop <<
" %\n";
1033 cout <<
" Projection step : ";
1035 cout << modalTool.getTimeProj() <<
" s\n";
1045 cout <<
" Normalization step : ";
1047 cout << modalTool.getTimeNorm() <<
" s\n";
1051 cout <<
" Time for local projection = ";
1053 cout << timeLocalProj <<
" s ";
1055 cout << 100*timeLocalProj/timeOuterLoop <<
" %\n";
1056 cout <<
" Time for local eigensolve = ";
1058 cout << timeLocalSolve <<
" s ";
1060 cout << 100*timeLocalSolve/timeOuterLoop <<
" %\n";
1061 cout <<
" Time for local update = ";
1063 cout << timeLocalUpdate <<
" s ";
1065 cout << 100*timeLocalUpdate/timeOuterLoop <<
" %\n";
1066 cout <<
" Time for residual computations = ";
1068 cout << timeResidual <<
" s ";
1070 cout << 100*timeResidual/timeOuterLoop <<
" %\n";
1071 cout <<
" Time for residuals norm computations = ";
1073 cout << timeNorm <<
" s ";
1075 cout << 100*timeNorm/timeOuterLoop <<
" %\n";
1076 cout <<
" Time for restarting space definition = ";
1078 cout << timeRestart <<
" s ";
1080 cout << 100*timeRestart/timeOuterLoop <<
" %\n";
1082 cout <<
" Total time for post-processing = ";
1084 cout << timePostProce <<
" s\n";