35 #include "LOBPCG_light.h"
40 double _tol,
int _maxIter,
int _verb) :
52 maxIterEigenSolve(_maxIter),
85 double _tol,
int _maxIter,
int _verb,
98 maxIterEigenSolve(_maxIter),
114 timeLocalUpdate(0.0),
129 LOBPCG_light::~LOBPCG_light() {
139 int LOBPCG_light::reSolve(
int numEigen,
Epetra_MultiVector &Q,
double *lambda,
int startingEV) {
192 if (numEigen <= startingEV) {
196 int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
200 int myPid = MyComm.MyPID();
208 int knownEV = startingEV;
209 int localVerbose = verbose*(myPid==0);
219 int xr = Q.MyLength();
224 tmp = 3*blockSize*xr;
226 double *work1 =
new (nothrow)
double[tmp];
233 memRequested +=
sizeof(double)*tmp/(1024.0*1024.0);
235 highMem = (highMem > currentSize()) ? highMem : currentSize();
237 double *tmpD = work1;
240 tmpD = tmpD + xr*blockSize;
243 tmpD = tmpD + xr*blockSize;
246 tmpD = tmpD + xr*blockSize;
259 lwork2 = 4*blockSize + 27*blockSize*blockSize;
260 double *work2 =
new (nothrow)
double[lwork2];
269 highMem = (highMem > currentSize()) ? highMem : currentSize();
273 double *theta = tmpD;
274 tmpD = tmpD + 3*blockSize;
276 double *normR = tmpD;
277 tmpD = tmpD + blockSize;
280 tmpD = tmpD + 9*blockSize*blockSize;
283 tmpD = tmpD + 9*blockSize*blockSize;
287 memRequested +=
sizeof(double)*lwork2/(1024.0*1024.0);
290 if (localVerbose > 2) {
291 resHistory =
new (nothrow)
double[maxIterEigenSolve*blockSize];
292 if (resHistory == 0) {
305 bool reStart =
false;
309 int twoBlocks = 2*blockSize;
310 int threeBlocks = 3*blockSize;
311 int nFound = blockSize;
314 double minNorm = 1000*tolEigenSolve;
315 bool hasNextVectors =
false;
317 if (localVerbose > 0) {
319 cout <<
" *|* Problem: ";
321 cout <<
"K*Q = M*Q D ";
323 cout <<
"K*Q = Q D ";
325 cout <<
" with preconditioner";
327 cout <<
" *|* Algorithm = LOBPCG (Small memory requirements)" << endl;
328 cout <<
" *|* Size of blocks = " << blockSize << endl;
329 cout <<
" *|* Number of requested eigenvalues = " << numEigen << endl;
331 cout.setf(ios::scientific, ios::floatfield);
332 cout <<
" *|* Tolerance for convergence = " << tolEigenSolve << endl;
333 cout <<
" *|* Norm used for convergence: ";
335 cout <<
"weighted L2-norm with user-provided weights" << endl;
338 cout <<
"L^2-norm" << endl;
341 cout <<
" *|* Input converged eigenvectors = " << startingEV << endl;
342 cout <<
"\n -- Start iterations -- \n";
345 timeOuterLoop -= MyWatch.WallTime();
346 for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {
348 highMem = (highMem > currentSize()) ? highMem : currentSize();
350 if ((outerIter == 1) || (reStart ==
true)) {
353 localSize = blockSize;
355 hasNextVectors =
false;
356 minNorm = 1000*tolEigenSolve;
360 timeMassOp -= MyWatch.WallTime();
364 memcpy(P.Values(), X.Values(), xr*blockSize*
sizeof(double));
365 timeMassOp += MyWatch.WallTime();
368 if ((knownEV > 0) && (nFound > 0)) {
372 timeOrtho -= MyWatch.WallTime();
373 info = modalTool.massOrthonormalize(X, P, M, copyQ, nFound, 1, R.Values());
374 timeOrtho += MyWatch.WallTime();
391 timePrecOp -= MyWatch.WallTime();
392 Prec->ApplyInverse(R, H);
393 timePrecOp += MyWatch.WallTime();
397 memcpy(H.Values(), R.Values(), xr*blockSize*
sizeof(double));
401 timeMassOp -= MyWatch.WallTime();
405 memcpy(R.Values(), H.Values(), xr*blockSize*
sizeof(double));
406 timeMassOp += MyWatch.WallTime();
412 timeOrtho -= MyWatch.WallTime();
413 modalTool.massOrthonormalize(H, R, M, copyQ, blockSize, 1);
414 timeOrtho += MyWatch.WallTime();
417 if (localSize == blockSize)
418 localSize += blockSize;
424 if (localSize == blockSize) {
426 timeLocalProj -= MyWatch.WallTime();
427 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, P.Values(), xr,
429 timeLocalProj += MyWatch.WallTime();
431 timeStifOp -= MyWatch.WallTime();
433 timeStifOp += MyWatch.WallTime();
435 timeLocalProj -= MyWatch.WallTime();
436 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, H.Values(), xr,
438 timeLocalProj += MyWatch.WallTime();
442 timeLocalProj -= MyWatch.WallTime();
443 for (i = 0; i < blockSize; ++i) {
444 memset(KK + i*localSize, 0, i*
sizeof(
double));
445 KK[i + i*localSize] = theta[i];
446 memset(MM + i*localSize, 0, i*
sizeof(
double));
447 MM[i + i*localSize] = 1.0;
449 timeLocalProj += MyWatch.WallTime();
451 timeLocalProj -= MyWatch.WallTime();
452 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, R.Values(), xr,
453 MM + blockSize*localSize, localSize, S);
454 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, R.Values(), xr,
455 MM + blockSize*localSize + blockSize, localSize, S);
456 timeLocalProj += MyWatch.WallTime();
458 timeStifOp -= MyWatch.WallTime();
460 timeStifOp += MyWatch.WallTime();
462 timeLocalProj -= MyWatch.WallTime();
463 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, R.Values(), xr,
464 KK + blockSize*localSize, localSize, S);
465 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, R.Values(), xr,
466 KK + blockSize*localSize + blockSize, localSize, S);
467 timeLocalProj += MyWatch.WallTime();
468 if (localSize > twoBlocks) {
470 timeMassOp -= MyWatch.WallTime();
474 memcpy(R.Values(), P.Values(), xr*blockSize*
sizeof(double));
475 timeMassOp += MyWatch.WallTime();
477 timeLocalProj -= MyWatch.WallTime();
478 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, R.Values(), xr,
479 MM + twoBlocks*localSize, localSize, S);
480 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, R.Values(), xr,
481 MM + twoBlocks*localSize + blockSize, localSize, S);
482 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, R.Values(), xr,
483 MM + twoBlocks*localSize + twoBlocks, localSize, S);
484 timeLocalProj += MyWatch.WallTime();
486 timeStifOp -= MyWatch.WallTime();
488 timeStifOp += MyWatch.WallTime();
490 timeLocalProj -= MyWatch.WallTime();
491 modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, R.Values(), xr,
492 KK + twoBlocks*localSize, localSize, S);
493 modalTool.localProjection(blockSize, blockSize, xr, H.Values(), xr, R.Values(), xr,
494 KK + twoBlocks*localSize + blockSize, localSize, S);
495 modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, R.Values(), xr,
496 KK + twoBlocks*localSize + twoBlocks, localSize, S);
497 timeLocalProj += MyWatch.WallTime();
502 timeLocalSolve -= MyWatch.WallTime();
503 int nevLocal = localSize;
504 info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
505 S, localSize, theta, localVerbose,
506 (blockSize == 1) ? 1 : 0);
507 timeLocalSolve += MyWatch.WallTime();
515 if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
516 if (localVerbose > 0) {
517 cout <<
" Iteration " << outerIter;
518 cout <<
"- Failure for spectral decomposition - RESTART with new random search\n";
520 if (blockSize == 1) {
527 nFound = blockSize - 1;
535 if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
536 for (j = 0; j < nevLocal; ++j)
537 memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*
sizeof(
double));
538 localSize = blockSize;
541 if ((localSize == threeBlocks) && (nevLocal <= twoBlocks)) {
542 for (j = 0; j < nevLocal; ++j)
543 memcpy(S + j*twoBlocks, S + j*threeBlocks, twoBlocks*
sizeof(
double));
544 localSize = twoBlocks;
548 if (localSize == twoBlocks) {
549 if (minNorm < 10*tolEigenSolve) {
550 timeRestart -= MyWatch.WallTime();
551 int numVec = (numEigen > knownEV + blockSize) ? blockSize : numEigen - knownEV - 1;
552 double *pointerS = S + blockSize*localSize;
553 double *pointerQ = Q.Values() + knownEV*xr;
554 callBLAS.
GEMM(
'N',
'N', xr, numVec, blockSize, 1.0, X.Values(), xr,
555 pointerS, localSize, 0.0, pointerQ, xr);
556 callBLAS.GEMM(
'N',
'N', xr, numVec, blockSize, 1.0, H.Values(), xr,
557 pointerS + blockSize, localSize, 1.0, pointerQ, xr);
558 hasNextVectors =
true;
559 timeRestart += MyWatch.WallTime();
561 timeLocalUpdate -= MyWatch.WallTime();
562 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
563 S + blockSize, localSize, 0.0, P.Values(), xr);
564 memcpy(R.Values(), X.Values(), xr*blockSize*
sizeof(double));
565 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
566 S, localSize, 0.0, X.Values(), xr);
567 X.Update(1.0, P, 1.0);
568 timeLocalUpdate += MyWatch.WallTime();
570 if (localSize == threeBlocks) {
571 if (minNorm < 10*tolEigenSolve) {
572 timeRestart -= MyWatch.WallTime();
573 int numVec = (numEigen > knownEV + blockSize) ? blockSize : numEigen - knownEV - 1;
574 double *pointerS = S + blockSize*localSize;
575 double *pointerQ = Q.Values() + knownEV*xr;
576 callBLAS.
GEMM(
'N',
'N', xr, numVec, blockSize, 1.0, X.Values(), xr,
577 pointerS, localSize, 0.0, pointerQ, xr);
579 callBLAS.GEMM(
'N',
'N', xr, numVec, twoBlocks, 1.0, H.Values(), xr,
580 pointerS + blockSize, localSize, 1.0, pointerQ, xr);
581 hasNextVectors =
true;
582 timeRestart += MyWatch.WallTime();
584 timeLocalUpdate -= MyWatch.WallTime();
585 memcpy(R.Values(), P.Values(), xr*blockSize*
sizeof(double));
586 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, H.Values(), xr,
587 S + blockSize, localSize, 0.0, P.Values(), xr);
588 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
589 S + twoBlocks, localSize, 1.0, P.Values(), xr);
590 memcpy(R.Values(), X.Values(), xr*blockSize*
sizeof(double));
591 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, R.Values(), xr,
592 S, localSize, 0.0, X.Values(), xr);
593 X.Update(1.0, P, 1.0);
594 timeLocalUpdate += MyWatch.WallTime();
598 if (localSize == blockSize) {
599 timeResidual -= MyWatch.WallTime();
600 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, blockSize,
601 0.0, R.Values(), xr);
602 timeResidual += MyWatch.WallTime();
603 timeLocalUpdate -= MyWatch.WallTime();
604 memcpy(H.Values(), X.Values(), xr*blockSize*
sizeof(double));
605 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, blockSize,
606 0.0, X.Values(), xr);
607 timeLocalUpdate += MyWatch.WallTime();
609 timeResidual -= MyWatch.WallTime();
610 for (j = 0; j < blockSize; ++j)
611 callBLAS.SCAL(localSize, theta[j], S + j*localSize);
612 callBLAS.GEMM(
'N',
'N', xr, blockSize, blockSize, -1.0, P.Values(), xr, S, blockSize,
613 1.0, R.Values(), xr);
614 timeResidual += MyWatch.WallTime();
618 timeStifOp -= MyWatch.WallTime();
620 timeStifOp += MyWatch.WallTime();
622 timeMassOp -= MyWatch.WallTime();
626 memcpy(H.Values(), X.Values(), xr*blockSize*
sizeof(double));
627 timeMassOp += MyWatch.WallTime();
629 timeResidual -= MyWatch.WallTime();
630 for (j = 0; j < blockSize; ++j)
631 callBLAS.AXPY(xr, -theta[j], H.Values() + j*xr, R.Values() + j*xr);
632 timeResidual += MyWatch.WallTime();
636 timeNorm -= MyWatch.WallTime();
638 R.NormWeighted(*vectWeight, normR);
645 for (j = 0; j < blockSize; ++j) {
646 normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
647 if (normR[j] < tolEigenSolve)
649 minNorm = (normR[j] < minNorm) ? normR[j] : minNorm;
651 timeNorm += MyWatch.WallTime();
654 if (localVerbose > 2) {
655 memcpy(resHistory + historyCount*blockSize, normR, blockSize*
sizeof(
double));
660 if (localVerbose > 0) {
661 cout <<
" Iteration " << outerIter <<
" - Number of converged eigenvectors ";
662 cout << knownEV + nFound << endl;
665 if (localVerbose > 1) {
668 cout.setf(ios::scientific, ios::floatfield);
669 for (i=0; i<blockSize; ++i) {
670 cout <<
" Iteration " << outerIter <<
" - Scaled Norm of Residual " << i;
671 cout <<
" = " << normR[i] << endl;
675 for (i=0; i<blockSize; ++i) {
676 cout <<
" Iteration " << outerIter <<
" - Ritz eigenvalue " << i;
677 cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
678 cout <<
" = " << theta[i] << endl;
687 accuracyCheck(&X, 0, &R, 0, 0, 0);
691 accuracyCheck(&X, 0, &R, ©Q, (localSize>blockSize) ? &H : 0,
692 (localSize>twoBlocks) ? &P : 0);
695 if (localSize < threeBlocks)
696 localSize += blockSize;
701 if (knownEV + nFound >= numEigen) {
702 for (j = 0; j < blockSize; ++j) {
703 if (normR[j] < tolEigenSolve) {
704 lambda[knownEV] = theta[j];
705 memcpy(Q.Values() + knownEV*xr, X.Values() + j*xr, xr*
sizeof(double));
709 if (localVerbose == 1) {
712 cout.setf(ios::scientific, ios::floatfield);
713 for (i=0; i<blockSize; ++i) {
714 cout <<
" Iteration " << outerIter <<
" - Scaled Norm of Residual " << i;
715 cout <<
" = " << normR[i] << endl;
723 if (hasNextVectors ==
true) {
725 for (j = 0; j < blockSize; ++j) {
726 if (normR[j] < tolEigenSolve) {
727 lambda[knownEV] = theta[j];
728 callFortran.SWAP(xr, X.Values() + j*xr, 1, Q.Values() + knownEV*xr, 1);
735 for (j = 0; j < blockSize; ++j) {
736 if (normR[j] < tolEigenSolve) {
737 lambda[knownEV] = theta[j];
738 memcpy(Q.Values() + knownEV*xr, X.Values() + j*xr, xr*
sizeof(double));
743 timeRestart -= MyWatch.WallTime();
745 for (j = 0; j < blockSize; ++j) {
746 if (normR[j] >= tolEigenSolve) {
748 memcpy(X.Values() + (j-nFound)*xr, X.Values() + j*xr, xr*
sizeof(double));
758 timeRestart += MyWatch.WallTime();
763 timeOuterLoop += MyWatch.WallTime();
764 highMem = (highMem > currentSize()) ? highMem : currentSize();
773 timePostProce -= MyWatch.WallTime();
774 if ((info == 0) && (knownEV > 0)) {
775 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
777 timePostProce += MyWatch.WallTime();
779 return (info == 0) ? knownEV : info;
789 cout.setf(ios::scientific, ios::floatfield);
792 int myPid = MyComm.MyPID();
797 tmp = myVerify.errorEquality(X, MX, M);
799 cout <<
" >> Difference between MX and M*X = " << tmp << endl;
801 tmp = myVerify.errorOrthonormality(X, M);
803 cout <<
" >> Error in X^T M X - I = " << tmp << endl;
806 tmp = myVerify.errorOrthonormality(X, 0);
808 cout <<
" >> Error in X^T X - I = " << tmp << endl;
813 tmp = myVerify.errorOrthogonality(X, R);
815 cout <<
" >> Orthogonality X^T R up to " << tmp << endl;
822 tmp = myVerify.errorOrthonormality(Q, M);
824 cout <<
" >> Error in Q^T M Q - I = " << tmp << endl;
826 tmp = myVerify.errorOrthogonality(Q, X, M);
828 cout <<
" >> Orthogonality Q^T M X up to " << tmp << endl;
831 tmp = myVerify.errorOrthogonality(Q, H, M);
833 cout <<
" >> Orthogonality Q^T M H up to " << tmp << endl;
836 tmp = myVerify.errorOrthogonality(Q, P, M);
838 cout <<
" >> Orthogonality Q^T M P up to " << tmp << endl;
842 tmp = myVerify.errorOrthonormality(Q, 0);
844 cout <<
" >> Error in Q^T Q - I = " << tmp << endl;
846 tmp = myVerify.errorOrthogonality(Q, X, 0);
848 cout <<
" >> Orthogonality Q^T X up to " << tmp << endl;
851 tmp = myVerify.errorOrthogonality(Q, H, 0);
853 cout <<
" >> Orthogonality Q^T H up to " << tmp << endl;
856 tmp = myVerify.errorOrthogonality(Q, P, 0);
858 cout <<
" >> Orthogonality Q^T P up to " << tmp << endl;
865 void LOBPCG_light::initializeCounters() {
884 timeLocalSolve = 0.0;
885 timeLocalUpdate = 0.0;
900 void LOBPCG_light::algorithmInfo()
const {
902 int myPid = MyComm.MyPID();
905 cout <<
" Algorithm: LOBPCG (block version) with storage for 4 blocks of vectors\n";
906 cout <<
" Block Size: " << blockSize << endl;
912 void LOBPCG_light::historyInfo()
const {
916 cout <<
" Block Size: " << blockSize << endl;
918 cout <<
" Residuals\n";
921 cout.setf(ios::scientific, ios::floatfield);
922 for (j = 0; j < historyCount; ++j) {
924 for (ii = 0; ii < blockSize; ++ii)
925 cout << resHistory[blockSize*j + ii] << endl;
933 void LOBPCG_light::memoryInfo()
const {
935 int myPid = MyComm.MyPID();
937 double maxHighMem = 0.0;
938 double tmp = highMem;
939 MyComm.MaxAll(&tmp, &maxHighMem, 1);
941 double maxMemRequested = 0.0;
943 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
947 cout.setf(ios::fixed, ios::floatfield);
948 cout <<
" Memory requested per processor by the eigensolver = (EST) ";
950 cout << maxMemRequested <<
" MB " << endl;
952 cout <<
" High water mark in eigensolver = (EST) ";
954 cout << maxHighMem <<
" MB " << endl;
961 void LOBPCG_light::operationInfo()
const {
963 int myPid = MyComm.MyPID();
966 cout <<
" --- Operations ---\n\n";
967 cout <<
" Total number of mass matrix multiplications = ";
969 cout << massOp + modalTool.getNumProj_MassMult() + modalTool.getNumNorm_MassMult() << endl;
970 cout <<
" Mass multiplications in solver loop = ";
972 cout << massOp << endl;
973 cout <<
" Mass multiplications for orthogonalization = ";
975 cout << modalTool.getNumProj_MassMult() << endl;
976 cout <<
" Mass multiplications for normalization = ";
978 cout << modalTool.getNumNorm_MassMult() << endl;
979 cout <<
" Total number of stiffness matrix operations = ";
981 cout << stifOp << endl;
982 cout <<
" Total number of preconditioner applications = ";
984 cout << precOp << endl;
985 cout <<
" Total number of computed eigen-residuals = ";
987 cout << residual << endl;
989 cout <<
" Total number of outer iterations = ";
991 cout << outerIter << endl;
992 cout <<
" Number of restarts = ";
994 cout << numRestart << endl;
1001 void LOBPCG_light::timeInfo()
const {
1003 int myPid = MyComm.MyPID();
1006 cout <<
" --- Timings ---\n\n";
1007 cout.setf(ios::fixed, ios::floatfield);
1009 cout <<
" Total time for outer loop = ";
1011 cout << timeOuterLoop <<
" s" << endl;
1012 cout <<
" Time for mass matrix operations = ";
1014 cout << timeMassOp <<
" s ";
1016 cout << 100*timeMassOp/timeOuterLoop <<
" %\n";
1017 cout <<
" Time for stiffness matrix operations = ";
1019 cout << timeStifOp <<
" s ";
1021 cout << 100*timeStifOp/timeOuterLoop <<
" %\n";
1022 cout <<
" Time for preconditioner applications = ";
1024 cout << timePrecOp <<
" s ";
1026 cout << 100*timePrecOp/timeOuterLoop <<
" %\n";
1027 cout <<
" Time for orthogonalizations = ";
1029 cout << timeOrtho <<
" s ";
1031 cout << 100*timeOrtho/timeOuterLoop <<
" %\n";
1032 cout <<
" Projection step : ";
1034 cout << modalTool.getTimeProj() <<
" s\n";
1035 cout <<
" Q^T mult. :: ";
1037 cout << modalTool.getTimeProj_QtMult() <<
" s\n";
1038 cout <<
" Q mult. :: ";
1040 cout << modalTool.getTimeProj_QMult() <<
" s\n";
1041 cout <<
" Mass mult. :: ";
1043 cout << modalTool.getTimeProj_MassMult() <<
" s\n";
1044 cout <<
" Normalization step : ";
1046 cout << modalTool.getTimeNorm() <<
" s\n";
1047 cout <<
" Mass mult. :: ";
1049 cout << modalTool.getTimeNorm_MassMult() <<
" s\n";
1050 cout <<
" Time for local projection = ";
1052 cout << timeLocalProj <<
" s ";
1054 cout << 100*timeLocalProj/timeOuterLoop <<
" %\n";
1055 cout <<
" Time for local eigensolve = ";
1057 cout << timeLocalSolve <<
" s ";
1059 cout << 100*timeLocalSolve/timeOuterLoop <<
" %\n";
1060 cout <<
" Time for local update = ";
1062 cout << timeLocalUpdate <<
" s ";
1064 cout << 100*timeLocalUpdate/timeOuterLoop <<
" %\n";
1065 cout <<
" Time for residual computations = ";
1067 cout << timeResidual <<
" s ";
1069 cout << 100*timeResidual/timeOuterLoop <<
" %\n";
1070 cout <<
" Time for residuals norm computations = ";
1072 cout << timeNorm <<
" s ";
1074 cout << 100*timeNorm/timeOuterLoop <<
" %\n";
1075 cout <<
" Time for restarting space definition = ";
1077 cout << timeRestart <<
" s ";
1079 cout << 100*timeRestart/timeOuterLoop <<
" %\n";
1081 cout <<
" Total time for post-processing = ";
1083 cout << timePostProce <<
" s\n";
void GEMM(const char TRANSA, const char TRANSB, const int M, const int N, const int K, const float ALPHA, const float *A, const int LDA, const float *B, const int LDB, const float BETA, float *C, const int LDC) const