40 double _tol,
int _maxIter,
int _verb) :
52 maxIterEigenSolve(_maxIter),
85 double _tol,
int _maxIter,
int _verb,
98 maxIterEigenSolve(_maxIter),
114 timeLocalUpdate(0.0),
139 int LOBPCG::reSolve(
int numEigen,
Epetra_MultiVector &Q,
double *lambda,
int startingEV) {
192 if (numEigen <= startingEV) {
196 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
200 int myPid = MyComm.MyPID();
208 int knownEV = startingEV;
209 int localVerbose = verbose*(myPid==0);
226 int xr = Q.MyLength();
231 tmp = (M == 0) ? 6*blockSize*xr : 9*blockSize*xr;
233 double *work1 =
new (nothrow)
double[tmp];
240 memRequested +=
sizeof(double)*tmp/(1024.0*1024.0);
242 highMem = (highMem > currentSize()) ? highMem : currentSize();
244 double *tmpD = work1;
247 tmpD = tmpD + xr*blockSize;
250 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
253 tmpD = tmpD + xr*blockSize;
256 tmpD = tmpD + xr*blockSize;
259 tmpD = tmpD + xr*blockSize;
262 tmpD = (M) ? tmpD + xr*blockSize : tmpD;
265 tmpD = tmpD + xr*blockSize;
268 tmpD = tmpD + xr*blockSize;
283 lwork2 = 4*blockSize + 27*blockSize*blockSize;
284 double *work2 =
new (nothrow)
double[lwork2];
293 highMem = (highMem > currentSize()) ? highMem : currentSize();
297 double *theta = tmpD;
298 tmpD = tmpD + 3*blockSize;
300 double *normR = tmpD;
301 tmpD = tmpD + blockSize;
304 tmpD = tmpD + 9*blockSize*blockSize;
307 tmpD = tmpD + 9*blockSize*blockSize;
311 memRequested +=
sizeof(double)*lwork2/(1024.0*1024.0);
314 if (localVerbose > 2) {
315 resHistory =
new (nothrow)
double[maxIterEigenSolve*blockSize];
316 if (resHistory == 0) {
329 bool reStart =
false;
333 int twoBlocks = 2*blockSize;
334 int threeBlocks = 3*blockSize;
335 int nFound = blockSize;
338 if (localVerbose > 0) {
340 cout <<
" *|* Problem: ";
342 cout <<
"K*Q = M*Q D ";
344 cout <<
"K*Q = Q D ";
346 cout <<
" with preconditioner";
348 cout <<
" *|* Algorithm = LOBPCG" << endl;
349 cout <<
" *|* Size of blocks = " << blockSize << endl;
350 cout <<
" *|* Number of requested eigenvalues = " << numEigen << endl;
352 cout.setf(ios::scientific, ios::floatfield);
353 cout <<
" *|* Tolerance for convergence = " << tolEigenSolve << endl;
354 cout <<
" *|* Norm used for convergence: ";
356 cout <<
"weighted L2-norm with user-provided weights" << endl;
359 cout <<
"L^2-norm" << endl;
362 cout <<
" *|* Input converged eigenvectors = " << startingEV << endl;
363 cout <<
"\n -- Start iterations -- \n";
366 timeOuterLoop -= MyWatch.WallTime();
367 for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
369 highMem = (highMem > currentSize()) ? highMem : currentSize();
371 if ((outerIter == 1) || (reStart ==
true)) {
374 localSize = blockSize;
383 timeMassOp -= MyWatch.WallTime();
386 timeMassOp += MyWatch.WallTime();
393 timeOrtho -= MyWatch.WallTime();
394 info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 1, R.Values());
395 timeOrtho += MyWatch.WallTime();
408 timeStifOp -= MyWatch.WallTime();
410 timeStifOp += MyWatch.WallTime();
420 timePrecOp -= MyWatch.WallTime();
421 Prec->ApplyInverse(R, H);
422 timePrecOp += MyWatch.WallTime();
426 memcpy(H.Values(), R.Values(), xr*blockSize*
sizeof(double));
430 timeMassOp -= MyWatch.WallTime();
433 timeMassOp += MyWatch.WallTime();
440 timeOrtho -= MyWatch.WallTime();
441 modalTool.massOrthonormalize(H, MH, M, copyQ, blockSize, 1, R.Values());
442 timeOrtho += MyWatch.WallTime();
446 timeStifOp -= MyWatch.WallTime();
448 timeStifOp += MyWatch.WallTime();
451 if (localSize == blockSize)
452 localSize += blockSize;
458 timeLocalProj -= MyWatch.WallTime();
459 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
461 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
463 if (localSize > blockSize) {
464 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KH.Values(), xr,
465 KK + blockSize*localSize, localSize, S);
466 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KH.Values(), xr,
467 KK + blockSize*localSize + blockSize, localSize, S);
468 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MH.Values(), xr,
469 MM + blockSize*localSize, localSize, S);
470 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MH.Values(), xr,
471 MM + blockSize*localSize + blockSize, localSize, S);
472 if (localSize > twoBlocks) {
473 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
474 KK + twoBlocks*localSize, localSize, S);
475 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, KP.Values(), xr,
476 KK + twoBlocks*localSize + blockSize, localSize, S);
477 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
478 KK + twoBlocks*localSize + twoBlocks, localSize, S);
479 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
480 MM + twoBlocks*localSize, localSize, S);
481 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, MP.Values(), xr,
482 MM + twoBlocks*localSize + blockSize, localSize, S);
483 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
484 MM + twoBlocks*localSize + twoBlocks, localSize, S);
487 timeLocalProj += MyWatch.WallTime();
490 timeLocalSolve -= MyWatch.WallTime();
491 int nevLocal = localSize;
492 info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
493 S, localSize, theta, localVerbose,
494 (blockSize == 1) ? 1 : 0);
495 timeLocalSolve += MyWatch.WallTime();
503 if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
504 if (localVerbose > 0) {
505 cout <<
" Iteration " << outerIter;
506 cout <<
"- Failure for spectral decomposition - RESTART with new random search\n";
508 if (blockSize == 1) {
515 nFound = blockSize - 1;
523 if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
524 for (j = 0; j < nevLocal; ++j)
525 memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*
sizeof(
double));
526 localSize = blockSize;
529 if ((localSize == threeBlocks) && (nevLocal <= twoBlocks)) {
530 for (j = 0; j < nevLocal; ++j)
531 memcpy(S + j*twoBlocks, S + j*threeBlocks, twoBlocks*
sizeof(
double));
532 localSize = twoBlocks;
536 timeResidual -= MyWatch.WallTime();
537 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
538 S, localSize, 0.0, R.Values(), xr);
539 if (localSize >= twoBlocks) {
540 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
541 S + blockSize, localSize, 1.0, R.Values(), xr);
542 if (localSize == threeBlocks) {
543 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
544 S + twoBlocks, localSize, 1.0, R.Values(), xr);
547 for (j = 0; j < blockSize; ++j)
548 callBLAS.SCAL(localSize, theta[j], S + j*localSize);
549 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
550 S, localSize, 1.0, R.Values(), xr);
551 if (localSize >= twoBlocks) {
552 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, -1.0, MH.Values(), xr,
553 S + blockSize, localSize, 1.0, R.Values(), xr);
554 if (localSize == threeBlocks) {
555 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
556 S + twoBlocks, localSize, 1.0, R.Values(), xr);
559 for (j = 0; j < blockSize; ++j)
560 callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
561 timeResidual += MyWatch.WallTime();
564 timeNorm -= MyWatch.WallTime();
566 R.NormWeighted(*vectWeight, normR);
573 for (j = 0; j < blockSize; ++j) {
574 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
575 if (normR[j] < tolEigenSolve)
578 timeNorm += MyWatch.WallTime();
581 if (localVerbose > 2) {
582 memcpy(resHistory + historyCount*blockSize, normR, blockSize*
sizeof(
double));
587 if (localVerbose > 0) {
588 cout <<
" Iteration " << outerIter <<
" - Number of converged eigenvectors ";
589 cout << knownEV + nFound << endl;
592 if (localVerbose > 1) {
595 cout.setf(ios::scientific, ios::floatfield);
596 for (i=0; i<blockSize; ++i) {
597 cout <<
" Iteration " << outerIter <<
" - Scaled Norm of Residual " << i;
598 cout <<
" = " << normR[i] << endl;
602 for (i=0; i<blockSize; ++i) {
603 cout <<
" Iteration " << outerIter <<
" - Ritz eigenvalue " << i;
604 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
605 cout <<
" = " << theta[i] << endl;
613 timeLocalUpdate -= MyWatch.WallTime();
614 memcpy(R.Values(), X.Values(), xr*blockSize*
sizeof(double));
615 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
616 0.0, X.Values(), xr);
617 memcpy(R.Values(), KX.Values(), xr*blockSize*
sizeof(double));
618 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
619 0.0, KX.Values(), xr);
621 memcpy(R.Values(), MX.Values(), xr*blockSize*
sizeof(double));
622 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, R.Values(), xr, S, localSize,
623 0.0, MX.Values(), xr);
625 if (localSize == twoBlocks) {
626 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
627 S+blockSize, localSize, 0.0, P.Values(), xr);
628 callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
629 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
630 S+blockSize, localSize, 0.0, KP.Values(), xr);
631 callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
633 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr,
634 S+blockSize, localSize, 0.0, MP.Values(), xr);
635 callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
638 if (localSize == threeBlocks) {
639 memcpy(R.Values(), P.Values(), xr*blockSize*
sizeof(double));
640 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
641 S + twoBlocks, localSize, 0.0, P.Values(), xr);
642 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
643 S + blockSize, localSize, 1.0, P.Values(), xr);
644 callBLAS.AXPY(xr*blockSize, 1.0, P.Values(), X.Values());
645 memcpy(R.Values(), KP.Values(), xr*blockSize*
sizeof(double));
646 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
647 S + twoBlocks, localSize, 0.0, KP.Values(), xr);
648 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, KH.Values(), xr,
649 S + blockSize, localSize, 1.0, KP.Values(), xr);
650 callBLAS.AXPY(xr*blockSize, 1.0, KP.Values(), KX.Values());
652 memcpy(R.Values(), MP.Values(), xr*blockSize*
sizeof(double));
653 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
654 S + twoBlocks, localSize, 0.0, MP.Values(), xr);
655 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, MH.Values(), xr,
656 S + blockSize, localSize, 1.0, MP.Values(), xr);
657 callBLAS.AXPY(xr*blockSize, 1.0, MP.Values(), MX.Values());
660 timeLocalUpdate += MyWatch.WallTime();
662 timeResidual -= MyWatch.WallTime();
663 memcpy(R.Values(), KX.Values(), blockSize*xr*
sizeof(double));
664 for (j = 0; j < blockSize; ++j)
665 callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
666 timeResidual += MyWatch.WallTime();
670 accuracyCheck(&X, &MX, &R, 0, 0, 0);
674 accuracyCheck(&X, &MX, &R, ©Q, (localSize>blockSize) ? &H : 0,
675 (localSize>twoBlocks) ? &P : 0);
678 if (localSize < threeBlocks)
679 localSize += blockSize;
684 int firstIndex = blockSize;
685 for (j = 0; j < blockSize; ++j) {
686 if (normR[j] >= tolEigenSolve) {
691 while (firstIndex < nFound) {
692 for (j = firstIndex; j < blockSize; ++j) {
693 if (normR[j] < tolEigenSolve) {
695 callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
696 callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
697 callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
701 for (j = 0; j < blockSize; ++j) {
702 if (normR[j] >= tolEigenSolve) {
710 memcpy(lambda + knownEV, theta, nFound*
sizeof(
double));
713 if (knownEV + nFound >= numEigen) {
714 callBLAS.GEMM(
'N',
'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
715 S, localSize, 0.0, R.Values(), xr);
716 if (localSize >= twoBlocks) {
717 callBLAS.GEMM(
'N',
'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
718 S + blockSize, localSize, 1.0, R.Values(), xr);
719 if (localSize == threeBlocks) {
720 callBLAS.GEMM(
'N',
'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
721 S + twoBlocks, localSize, 1.0, R.Values(), xr);
724 memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*
sizeof(double));
726 if (localVerbose == 1) {
729 cout.setf(ios::scientific, ios::floatfield);
730 for (i=0; i<blockSize; ++i) {
731 cout <<
" Iteration " << outerIter <<
" - Scaled Norm of Residual " << i;
732 cout <<
" = " << normR[i] << endl;
740 callBLAS.GEMM(
'N',
'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
741 S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
742 if (localSize >= twoBlocks) {
743 callBLAS.GEMM(
'N',
'N', xr, nFound, blockSize, 1.0, H.Values(), xr,
744 S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
745 if (localSize >= threeBlocks)
746 callBLAS.GEMM(
'N',
'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
747 S + twoBlocks, localSize, 1.0, Q.Values() + knownEV*xr, xr);
752 timeRestart -= MyWatch.WallTime();
753 int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
754 double *Snew = S + nFound*localSize;
755 memcpy(R.Values(), X.Values(), blockSize*xr*
sizeof(double));
756 callBLAS.GEMM(
'N',
'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
757 Snew, localSize, 0.0, X.Values(), xr);
758 memcpy(R.Values(), KX.Values(), blockSize*xr*
sizeof(double));
759 callBLAS.GEMM(
'N',
'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
760 Snew, localSize, 0.0, KX.Values(), xr);
762 memcpy(R.Values(), MX.Values(), blockSize*xr*
sizeof(double));
763 callBLAS.GEMM(
'N',
'N', xr, leftOver, blockSize, 1.0, R.Values(), xr,
764 Snew, localSize, 0.0, MX.Values(), xr);
766 if (localSize >= twoBlocks) {
767 callBLAS.GEMM(
'N',
'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
768 Snew+blockSize, localSize, 1.0, X.Values(), xr);
769 callBLAS.GEMM(
'N',
'N', xr, leftOver, blockSize, 1.0, KH.Values(), xr,
770 Snew+blockSize, localSize, 1.0, KX.Values(), xr);
772 callBLAS.GEMM(
'N',
'N', xr, leftOver, blockSize, 1.0, MH.Values(), xr,
773 Snew+blockSize, localSize, 1.0, MX.Values(), xr);
775 if (localSize >= threeBlocks) {
776 callBLAS.GEMM(
'N',
'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
777 Snew+twoBlocks, localSize, 1.0, X.Values(), xr);
778 callBLAS.GEMM(
'N',
'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
779 Snew+twoBlocks, localSize, 1.0, KX.Values(), xr);
781 callBLAS.GEMM(
'N',
'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
782 Snew+twoBlocks, localSize, 1.0, MX.Values(), xr);
786 if (nevLocal < blockSize + nFound) {
795 timeRestart += MyWatch.WallTime();
798 timeOuterLoop += MyWatch.WallTime();
799 highMem = (highMem > currentSize()) ? highMem : currentSize();
808 timePostProce -= MyWatch.WallTime();
809 if ((info == 0) && (knownEV > 0)) {
810 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
812 timePostProce += MyWatch.WallTime();
814 return (info == 0) ? knownEV : info;
824 cout.setf(ios::scientific, ios::floatfield);
827 int myPid = MyComm.MyPID();
832 tmp = myVerify.errorEquality(X, MX, M);
834 cout <<
" >> Difference between MX and M*X = " << tmp << endl;
836 tmp = myVerify.errorOrthonormality(X, M);
838 cout <<
" >> Error in X^T M X - I = " << tmp << endl;
841 tmp = myVerify.errorOrthonormality(X, 0);
843 cout <<
" >> Error in X^T X - I = " << tmp << endl;
848 tmp = myVerify.errorOrthogonality(X, R);
850 cout <<
" >> Orthogonality X^T R up to " << tmp << endl;
857 tmp = myVerify.errorOrthonormality(Q, M);
859 cout <<
" >> Error in Q^T M Q - I = " << tmp << endl;
861 tmp = myVerify.errorOrthogonality(Q, X, M);
863 cout <<
" >> Orthogonality Q^T M X up to " << tmp << endl;
866 tmp = myVerify.errorOrthogonality(Q, H, M);
868 cout <<
" >> Orthogonality Q^T M H up to " << tmp << endl;
871 tmp = myVerify.errorOrthogonality(Q, P, M);
873 cout <<
" >> Orthogonality Q^T M P up to " << tmp << endl;
877 tmp = myVerify.errorOrthonormality(Q, 0);
879 cout <<
" >> Error in Q^T Q - I = " << tmp << endl;
881 tmp = myVerify.errorOrthogonality(Q, X, 0);
883 cout <<
" >> Orthogonality Q^T X up to " << tmp << endl;
886 tmp = myVerify.errorOrthogonality(Q, H, 0);
888 cout <<
" >> Orthogonality Q^T H up to " << tmp << endl;
891 tmp = myVerify.errorOrthogonality(Q, P, 0);
893 cout <<
" >> Orthogonality Q^T P up to " << tmp << endl;
900 void LOBPCG::initializeCounters() {
919 timeLocalSolve = 0.0;
920 timeLocalUpdate = 0.0;
935 void LOBPCG::algorithmInfo()
const {
937 int myPid = MyComm.MyPID();
940 cout <<
" Algorithm: LOBPCG (block version) with Cholesky-based local eigensolver\n";
941 cout <<
" Block Size: " << blockSize << endl;
947 void LOBPCG::historyInfo()
const {
951 cout <<
" Block Size: " << blockSize << endl;
953 cout <<
" Residuals\n";
956 cout.setf(ios::scientific, ios::floatfield);
957 for (j = 0; j < historyCount; ++j) {
959 for (ii = 0; ii < blockSize; ++ii)
960 cout << resHistory[blockSize*j + ii] << endl;
968 void LOBPCG::memoryInfo()
const {
970 int myPid = MyComm.MyPID();
972 double maxHighMem = 0.0;
973 double tmp = highMem;
974 MyComm.MaxAll(&tmp, &maxHighMem, 1);
976 double maxMemRequested = 0.0;
978 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
982 cout.setf(ios::fixed, ios::floatfield);
983 cout <<
" Memory requested per processor by the eigensolver = (EST) ";
985 cout << maxMemRequested <<
" MB " << endl;
987 cout <<
" High water mark in eigensolver = (EST) ";
989 cout << maxHighMem <<
" MB " << endl;
996 void LOBPCG::operationInfo()
const {
998 int myPid = MyComm.MyPID();
1001 cout <<
" --- Operations ---\n\n";
1002 cout <<
" Total number of mass matrix multiplications = ";
1004 cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
1005 cout <<
" Mass multiplications in solver loop = ";
1007 cout << massOp << endl;
1008 cout <<
" Mass multiplications for orthogonalization = ";
1010 cout << modalTool.getNumProj_MassMult() << endl;
1011 cout <<
" Mass multiplications for normalization = ";
1013 cout << modalTool.getNumNorm_MassMult() << endl;
1014 cout <<
" Total number of stiffness matrix operations = ";
1016 cout << stifOp << endl;
1017 cout <<
" Total number of preconditioner applications = ";
1019 cout << precOp << endl;
1020 cout <<
" Total number of computed eigen-residuals = ";
1022 cout << residual << endl;
1024 cout <<
" Total number of outer iterations = ";
1026 cout << outerIter << endl;
1027 cout <<
" Number of restarts = ";
1029 cout << numRestart << endl;
1036 void LOBPCG::timeInfo()
const {
1038 int myPid = MyComm.MyPID();
1041 cout <<
" --- Timings ---\n\n";
1042 cout.setf(ios::fixed, ios::floatfield);
1044 cout <<
" Total time for outer loop = ";
1046 cout << timeOuterLoop <<
" s" << endl;
1047 cout <<
" Time for mass matrix operations = ";
1049 cout << timeMassOp <<
" s ";
1051 cout << 100*timeMassOp/timeOuterLoop <<
" %\n";
1052 cout <<
" Time for stiffness matrix operations = ";
1054 cout << timeStifOp <<
" s ";
1056 cout << 100*timeStifOp/timeOuterLoop <<
" %\n";
1057 cout <<
" Time for preconditioner applications = ";
1059 cout << timePrecOp <<
" s ";
1061 cout << 100*timePrecOp/timeOuterLoop <<
" %\n";
1062 cout <<
" Time for orthogonalizations = ";
1064 cout << timeOrtho <<
" s ";
1066 cout << 100*timeOrtho/timeOuterLoop <<
" %\n";
1067 cout <<
" Projection step : ";
1069 cout << modalTool.getTimeProj() <<
" s\n";
1070 cout <<
" Q^T mult. :: ";
1072 cout << modalTool.getTimeProj_QtMult() <<
" s\n";
1073 cout <<
" Q mult. :: ";
1075 cout << modalTool.getTimeProj_QMult() <<
" s\n";
1076 cout <<
" Mass mult. :: ";
1078 cout << modalTool.getTimeProj_MassMult() <<
" s\n";
1079 cout <<
" Normalization step : ";
1081 cout << modalTool.getTimeNorm() <<
" s\n";
1082 cout <<
" Mass mult. :: ";
1084 cout << modalTool.getTimeNorm_MassMult() <<
" s\n";
1085 cout <<
" Time for local projection = ";
1087 cout << timeLocalProj <<
" s ";
1089 cout << 100*timeLocalProj/timeOuterLoop <<
" %\n";
1090 cout <<
" Time for local eigensolve = ";
1092 cout << timeLocalSolve <<
" s ";
1094 cout << 100*timeLocalSolve/timeOuterLoop <<
" %\n";
1095 cout <<
" Time for local update = ";
1097 cout << timeLocalUpdate <<
" s ";
1099 cout << 100*timeLocalUpdate/timeOuterLoop <<
" %\n";
1100 cout <<
" Time for residual computations = ";
1102 cout << timeResidual <<
" s ";
1104 cout << 100*timeResidual/timeOuterLoop <<
" %\n";
1105 cout <<
" Time for residuals norm computations = ";
1107 cout << timeNorm <<
" s ";
1109 cout << 100*timeNorm/timeOuterLoop <<
" %\n";
1110 cout <<
" Time for restarting space definition = ";
1112 cout << timeRestart <<
" s ";
1114 cout << 100*timeRestart/timeOuterLoop <<
" %\n";
1116 cout <<
" Total time for post-processing = ";
1118 cout << timePostProce <<
" s\n";