19 #include "ModifiedARPACKm3.h"
23 double _tol,
int _maxIter,
int _verb) :
34 maxIterEigenSolve(_maxIter),
58 double _tol,
int _maxIter,
int _verb,
double *_weight) :
69 maxIterEigenSolve(_maxIter),
91 ModifiedARPACKm3::~ModifiedARPACKm3() {
103 return ModifiedARPACKm3::reSolve(numEigen, Q, lambda, 0, 0);
111 return ModifiedARPACKm3::reSolve(numEigen, Q, lambda, startingEV, 0);
177 if (numEigen <= startingEV) {
181 int info = myVerify.inputArguments(numEigen, K, M, 0, Q, minimumSpaceDimension(numEigen));
185 int myPid = MyComm.MyPID();
187 int localSize = Q.MyLength();
188 int NCV = Q.NumVectors();
191 if (NCV > Q.GlobalLength()) {
192 if (numEigen >= Q.GlobalLength()) {
194 cerr <<
" !! The number of requested eigenvalues must be smaller than the dimension";
195 cerr <<
" of the matrix !!\n";
199 NCV = Q.GlobalLength();
208 int localVerbose = verbose*(myPid == 0);
217 highMem = (highMem > currentSize()) ? highMem : currentSize();
222 int *wI =
new (nothrow)
int[lwI];
228 memRequested +=
sizeof(int)*lwI/(1024.0*1024.0);
231 int *ipntr = wI + 11;
233 int lworkl = NCV*(NCV+8);
234 int lwD = lworkl + 4*localSize;
235 double *wD =
new (nothrow)
double[lwD];
242 memRequested +=
sizeof(double)*(4*localSize+lworkl)/(1024.0*1024.0);
244 double *pointer = wD;
246 double *workl = pointer;
247 pointer = pointer + lworkl;
249 double *resid = pointer;
250 pointer = pointer + localSize;
252 double *workd = pointer;
254 double *v = Q.Values();
256 highMem = (highMem > currentSize()) ? highMem : currentSize();
258 if (startingEV > 0) {
260 memset(resid, 0, localSize*
sizeof(
double));
261 for (
int jj = 0; jj < startingEV; ++jj)
262 for (
int ii = 0; ii < localSize; ++ii)
263 resid[ii] += v[ii + jj*localSize];
268 iparam[3-1] = maxIterEigenSolve;
283 int loopZ = (NCV > 10) ? 10 : NCV;
285 int lwD2 = localSize + 2*NCV-1 + NCV;
286 lwD2 += (M) ? 3*loopZ*localSize : 2*loopZ*localSize;
287 double *wD2 =
new (nothrow)
double[lwD2];
295 memRequested +=
sizeof(double)*lwD2/(1024.0*1024.0);
300 double *vTmp = pointer;
301 pointer = pointer + localSize;
305 double *dd = pointer;
306 pointer = pointer + NCV;
308 double *ee = pointer;
309 pointer = pointer + NCV-1;
311 double *vz = pointer;
312 pointer = pointer + loopZ*localSize;
315 double *kvz = pointer;
316 pointer = pointer + loopZ*localSize;
319 double *mvz = (M) ? pointer : vz;
320 pointer = (M) ? pointer + loopZ*localSize : pointer;
323 double *normR = pointer;
330 highMem = (highMem > currentSize()) ? highMem : currentSize();
333 if (localVerbose > 2) {
334 resHistory =
new (nothrow)
double[maxIterEigenSolve*NCV];
335 if (resHistory == 0) {
346 highMem = (highMem > currentSize()) ? highMem : currentSize();
348 if (localVerbose > 0) {
350 cout <<
" *|* Problem: ";
352 cout <<
"K*Q = M*Q D ";
354 cout <<
"K*Q = Q D ";
356 cout <<
" *|* Algorithm = ARPACK (Mode 3, modified such that user checks convergence)" << endl;
357 cout <<
" *|* Number of requested eigenvalues = " << numEigen << endl;
359 cout.setf(ios::scientific, ios::floatfield);
360 cout <<
" *|* Tolerance for convergence = " << tolEigenSolve << endl;
362 cout <<
" *|* User-defined starting vector (Combination of " << startingEV <<
" vectors)\n";
363 cout <<
" *|* Norm used for convergence: ";
365 cout <<
"weighted L2-norm with user-provided weights" << endl;
367 cout <<
"L^2-norm" << endl;
369 cout <<
" *|* Size of orthogonal subspace = " << orthoVec->NumVectors() << endl;
370 cout <<
"\n -- Start iterations -- \n";
377 timeOuterLoop -= MyWatch.WallTime();
380 highMem = (highMem > currentSize()) ? highMem : currentSize();
384 callFortran.PSAUPD(MPIComm->
Comm(), &ido,
'G', localSize,
"LM", numEigen, tolEigenSolve,
385 resid, NCV, v, localSize, iparam, ipntr, workd, workl, lworkl, &info, 0);
387 callFortran.SAUPD(&ido,
'G', localSize,
"LM", numEigen, tolEigenSolve, resid, NCV, v,
388 localSize, iparam, ipntr, workd, workl, lworkl, &info, 0);
390 callFortran.SAUPD(&ido,
'G', localSize,
"LM", numEigen, tolEigenSolve, resid, NCV, v,
391 localSize, iparam, ipntr, workd, workl, lworkl, &info, 0);
396 v3.ResetView(workd + ipntr[0] - 1);
398 timeMassOp -= MyWatch.WallTime();
402 memcpy(v1.Values(), v3.Values(), localSize*
sizeof(double));
403 timeMassOp += MyWatch.WallTime();
405 if ((orthoVec) && (verbose > 3)) {
407 double maxDot = myVerify.errorOrthogonality(orthoVec, &v1, 0);
409 cout <<
" Maximum Euclidean dot product against orthogonal space (Before Solve) = ";
410 cout << maxDot << endl;
414 v2.ResetView(workd + ipntr[1] - 1);
415 timeStifOp -= MyWatch.WallTime();
416 K->ApplyInverse(v1, v2);
417 timeStifOp += MyWatch.WallTime();
426 memcpy(Mv2.Values(), v2.Values(), localSize*
sizeof(double));
427 modalTool.massOrthonormalize(v2, Mv2, M, *orthoVec, 1, 1);
429 if ((orthoVec) && (verbose > 3)) {
431 double maxDot = myVerify.errorOrthogonality(orthoVec, &v2, M);
433 cout <<
" Maximum M-dot product against orthogonal space (After Solve) = ";
434 cout << maxDot << endl;
442 v1.ResetView(workd + ipntr[2] - 1);
443 v2.ResetView(workd + ipntr[1] - 1);
444 if ((orthoVec) && (verbose > 3)) {
446 double maxDot = myVerify.errorOrthogonality(orthoVec, &v1, 0);
448 cout <<
" Maximum Euclidean dot product against orthogonal space (Before Solve) = ";
449 cout << maxDot << endl;
452 timeStifOp -= MyWatch.WallTime();
453 K->ApplyInverse(v1, v2);
454 timeStifOp += MyWatch.WallTime();
463 memcpy(Mv2.Values(), v2.Values(), localSize*
sizeof(double));
464 modalTool.massOrthonormalize(v2, Mv2, M, *orthoVec, 1, 1);
466 if ((orthoVec) && (verbose > 3)) {
468 double maxDot = myVerify.errorOrthogonality(orthoVec, &v2, M);
470 cout <<
" Maximum M-dot product against orthogonal space (After Solve) = ";
471 cout << maxDot << endl;
479 v1.ResetView(workd + ipntr[0] - 1);
480 v2.ResetView(workd + ipntr[1] - 1);
481 timeMassOp -= MyWatch.WallTime();
485 memcpy(v2.Values(), v1.Values(), localSize*
sizeof(double));
486 timeMassOp += MyWatch.WallTime();
492 timeResidual -= MyWatch.WallTime();
494 memcpy(dd, workl + NCV + ipntr[4] - 1, NCV*
sizeof(
double));
496 memcpy(ee, workl + ipntr[4], (NCV-1)*
sizeof(
double));
499 workt = workl + 4*NCV + NCV*NCV;
500 callFortran.STEQR(
'I', NCV, dd, ee, zz, NCV, workt, &info);
502 if (localVerbose > 0) {
504 cerr <<
" Error with DSTEQR, info = " << info << endl;
513 for (jz = 0; jz < NCV; jz += loopZ) {
514 int colZ = (jz + loopZ < NCV) ? loopZ : NCV - jz;
515 callBLAS.GEMM(
'N',
'N', localSize, colZ, NCV, 1.0, v, localSize,
516 zz + jz*NCV, NCV, 0.0, vz, localSize);
519 M->Apply(approxEV, MapproxEV);
520 K->Apply(approxEV, KapproxEV);
521 for (ii = 0; ii < colZ; ++ii) {
522 callBLAS.AXPY(localSize, -1.0/dd[ii+jz], MapproxEV.Values() + ii*localSize,
523 KapproxEV.Values() + ii*localSize);
527 KapproxEV.NormWeighted(*vectWeight, normR + jz);
530 KapproxEV.Norm2(normR + jz);
533 for (ii = 0; ii < colZ; ++ii) {
534 normR[ii+jz] = normR[ii+jz]*dd[ii+jz];
537 for (ii=0; ii<colZ; ++ii) {
538 if (normR[ii+jz] < tolEigenSolve)
542 timeResidual += MyWatch.WallTime();
545 if (localVerbose > 0) {
546 cout <<
" Iteration " << outerIter;
547 cout <<
" - Number of converged eigenvalues " << iparam[4] << endl;
549 if (localVerbose > 2) {
550 memcpy(resHistory + historyCount, normR, NCV*
sizeof(
double));
553 if (localVerbose > 1) {
555 cout.setf(ios::scientific, ios::floatfield);
556 for (ii=0; ii < NCV; ++ii) {
557 cout <<
" Iteration " << outerIter;
558 cout <<
" - Scaled Norm of Residual " << ii <<
" = " << normR[ii] << endl;
562 for (ii = 0; ii < NCV; ++ii) {
563 cout <<
" Iteration " << outerIter <<
" - Ritz eigenvalue " << ii;
564 cout.setf((fabs(dd[ii]) > 100) ? ios::scientific : ios::fixed, ios::floatfield);
565 cout <<
" = " << 1.0/dd[ii] << endl;
572 timeOuterLoop += MyWatch.WallTime();
573 highMem = (highMem > currentSize()) ? highMem : currentSize();
578 cerr <<
" Error with DSAUPD, info = " << info << endl;
584 timePostProce -= MyWatch.WallTime();
586 double *pointer = workl + 4*NCV + NCV*NCV;
587 for (ii=0; ii < localSize; ii += 3) {
588 int nRow = (ii + 3 < localSize) ? 3 : localSize - ii;
589 for (jj=0; jj<NCV; ++jj)
590 memcpy(pointer + jj*nRow, v + ii + jj*localSize, nRow*
sizeof(
double));
591 callBLAS.GEMM(
'N',
'N', nRow, NCV, NCV, 1.0, pointer, nRow, zz, NCV,
592 0.0, Q.Values() + ii, localSize);
596 for (ii=0; ii < NCV; ++ii) {
597 if (normR[ii] < tolEigenSolve) {
598 lambda[knownEV] = 1.0/dd[ii];
599 memcpy(Q.Values()+knownEV*localSize, Q.Values()+ii*localSize, localSize*
sizeof(double));
601 if (knownEV == Q.NumVectors())
607 mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), localSize);
609 timePostProce += MyWatch.WallTime();
613 orthoOp = iparam[11-1];
622 return (info == 0) ? knownEV : info;
627 void ModifiedARPACKm3::initializeCounters() {
653 void ModifiedARPACKm3::algorithmInfo()
const {
655 int myPid = MyComm.MyPID();
658 cout <<
" Algorithm: ARPACK (Mode 3, modified such that user checks convergence)\n";
664 void ModifiedARPACKm3::memoryInfo()
const {
666 int myPid = MyComm.MyPID();
668 double maxHighMem = 0.0;
669 double tmp = highMem;
670 MyComm.MaxAll(&tmp, &maxHighMem, 1);
672 double maxMemRequested = 0.0;
674 MyComm.MaxAll(&tmp, &maxMemRequested, 1);
678 cout.setf(ios::fixed, ios::floatfield);
679 cout <<
" Memory requested per processor by the eigensolver = (EST) ";
681 cout << maxMemRequested <<
" MB " << endl;
683 cout <<
" High water mark in eigensolver = (EST) ";
685 cout << maxHighMem <<
" MB " << endl;
692 void ModifiedARPACKm3::historyInfo()
const {
696 cout <<
" Residuals\n";
699 cout.setf(ios::scientific, ios::floatfield);
700 for (j = 0; j < historyCount; ++j) {
701 cout << resHistory[j] << endl;
709 void ModifiedARPACKm3::operationInfo()
const {
711 int myPid = MyComm.MyPID();
714 cout <<
" --- Operations ---\n\n";
715 cout <<
" Total number of mass matrix multiplications = ";
717 cout << massOp << endl;
718 cout <<
" Total number of orthogonalizations = ";
720 cout << orthoOp << endl;
721 cout <<
" Total number of stiffness matrix operations = ";
723 cout << stifOp << endl;
724 cout <<
" Total number of computed eigen-residuals = ";
726 cout << numResidual << endl;
728 cout <<
" Total number of outer iterations = ";
730 cout << outerIter << endl;
737 void ModifiedARPACKm3::timeInfo()
const {
739 int myPid = MyComm.MyPID();
742 cout <<
" --- Timings ---\n\n";
743 cout.setf(ios::fixed, ios::floatfield);
745 cout <<
" Total time for outer loop = ";
747 cout << timeOuterLoop <<
" s" << endl;
748 cout <<
" Time for mass matrix operations = ";
750 cout << timeMassOp <<
" s ";
752 cout << 100*timeMassOp/timeOuterLoop <<
" %\n";
753 cout <<
" Time for stiffness matrix solves = ";
755 cout << timeStifOp <<
" s ";
757 cout << 100*timeStifOp/timeOuterLoop <<
" %\n";
758 cout <<
" Time for residual computations = ";
760 cout << timeResidual <<
" s ";
762 cout << 100*timeResidual/timeOuterLoop <<
" %\n";
764 cout <<
" Total time for post-processing = ";
766 cout << timePostProce <<
" s\n";