19 #include "ModalTools.h"
31 timeProj_MassMult(0.0),
32 timeNorm_MassMult(0.0),
38 callLAPACK.LAMCH(
'E', eps);
43 int ModalTools::makeSimpleLumpedMass(
const Epetra_Operator *M,
double *normWeight)
const {
62 double *zVal =
new double[row];
74 for (i = 0; i < row; ++i)
78 MyComm.SumAll(normWeight, &rho, 1);
83 if (MyComm.MyPID() == 0)
84 cerr <<
"\n !!! The mass matrix gives a negative mass: " << rho <<
" !!! \n\n";
96 for (i = 0; i < row; ++i)
106 int howMany,
int type,
double *WS,
double kappa) {
152 timeProj -= MyWatch.WallTime();
156 int xr = X.MyLength();
157 int qc = Q.NumVectors();
161 Epetra_MultiVector MXX(View, X.Map(), (M) ? MX.Values() + xr*(MX.NumVectors()-howMany)
162 : X.Values() + xr*(X.NumVectors()-howMany),
168 double *oldDot =
new double[xc];
172 double *qTmx =
new double[2*qc*xc];
175 timeQtMult -= MyWatch.WallTime();
176 callBLAS.GEMM(
'T',
'N', qc, xc, xr, 1.0, Q.Values(), xr, MXX.Values(), xr,
177 0.0, qTmx + qc*xc, qc);
178 MyComm.SumAll(qTmx + qc*xc, qTmx, qc*xc);
179 timeQtMult += MyWatch.WallTime();
182 timeQMult -= MyWatch.WallTime();
183 callBLAS.GEMM(
'N',
'N', xr, xc, qc, -1.0, Q.Values(), xr, qTmx, qc,
184 1.0, XX.Values(), xr);
185 timeQMult += MyWatch.WallTime();
189 if ((qc >= xc) || (WS == 0)) {
190 timeProj_MassMult -= MyWatch.WallTime();
192 timeProj_MassMult += MyWatch.WallTime();
193 numProj_MassMult += xc;
197 timeProj_MassMult -= MyWatch.WallTime();
199 timeProj_MassMult += MyWatch.WallTime();
200 numProj_MassMult += qc;
201 callBLAS.GEMM(
'N',
'N', xr, xc, qc, -1.0, MQ.Values(), xr, qTmx, qc,
202 1.0, MXX.Values(), xr);
208 for (j = 0; j < xc; ++j) {
210 MXX(j)->Dot(*(XX(j)), &newDot);
212 if (kappa*newDot < oldDot[j]) {
215 timeQtMult -= MyWatch.WallTime();
216 callBLAS.GEMM(
'T',
'N', qc, xc, xr, 1.0, Q.Values(), xr, MXX.Values(), xr,
217 0.0, qTmx + qc*xc, qc);
218 MyComm.SumAll(qTmx + qc*xc, qTmx, qc*xc);
219 timeQtMult += MyWatch.WallTime();
221 timeQMult -= MyWatch.WallTime();
222 callBLAS.GEMM(
'N',
'N', xr, xc, qc, -1.0, Q.Values(), xr, qTmx, qc,
223 1.0, XX.Values(), xr);
224 timeQMult += MyWatch.WallTime();
228 if ((qc >= xc) || (WS == 0)) {
229 timeProj_MassMult -= MyWatch.WallTime();
231 timeProj_MassMult += MyWatch.WallTime();
232 numProj_MassMult += xc;
236 timeProj_MassMult -= MyWatch.WallTime();
238 timeProj_MassMult += MyWatch.WallTime();
239 numProj_MassMult += qc;
240 callBLAS.GEMM(
'N',
'N', xr, xc, qc, -1.0, MQ.Values(), xr, qTmx, qc,
241 1.0, MXX.Values(), xr);
253 timeProj += MyWatch.WallTime();
256 timeNorm -= MyWatch.WallTime();
260 int xc = X.NumVectors();
261 int xr = X.MyLength();
262 int globalSize = X.GlobalLength();
263 int shift = (type == 2) ? 0 : Q.NumVectors();
264 int mxc = (M) ? MX.NumVectors() : X.NumVectors();
266 bool allocated =
false;
273 double *MXX = (M) ? MX.Values() : X.Values();
274 double *product =
new double[2*xc];
278 for (j = 0; j < howMany; ++j) {
280 int numX = xc - howMany + j;
281 int numMX = mxc - howMany + j;
284 if (numX + shift >= globalSize) {
296 for (numTrials = 0; numTrials < 10; ++numTrials) {
298 double *Xj = X.Values() + xr*numX;
299 double *MXj = MXX + xr*numMX;
302 dTmp = callBLAS.
DOT(xr, Xj, MXj);
303 MyComm.SumAll(&dTmp, &oldDot, 1);
305 memcpy(oldMXj, MXj, xr*
sizeof(
double));
311 callBLAS.GEMV(
'T', xr, numX, 1.0, X.Values(), xr, MXj, 0.0, product + xc);
312 MyComm.SumAll(product + xc, product, numX);
313 callBLAS.GEMV(
'N', xr, numX, -1.0, X.Values(), xr, product, 1.0, Xj);
316 callBLAS.GEMV(
'N', xr, numX, -1.0, MXX, xr, product, 1.0, MXj);
321 timeNorm_MassMult -= MyWatch.WallTime();
323 timeNorm_MassMult += MyWatch.WallTime();
324 numNorm_MassMult += 1;
329 dTmp = callBLAS.DOT(xr, Xj, MXj);
330 MyComm.SumAll(&dTmp, &dot, 1);
332 if (kappa*dot < oldDot) {
333 callBLAS.GEMV(
'T', xr, numX, 1.0, X.Values(), xr, MXj, 0.0, product + xc);
334 MyComm.SumAll(product + xc, product, numX);
335 callBLAS.GEMV(
'N', xr, numX, -1.0, X.Values(), xr, product, 1.0, Xj);
338 callBLAS.GEMV(
'N', xr, numX, -1.0, MXX, xr, product, 1.0, MXj);
343 timeNorm_MassMult -= MyWatch.WallTime();
345 timeNorm_MassMult += MyWatch.WallTime();
346 numNorm_MassMult += 1;
354 dTmp = callBLAS.DOT(xr, Xj, oldMXj);
355 MyComm.SumAll(&dTmp, &norm, 1);
357 if (norm > oldDot*eps*eps) {
358 norm = 1.0/sqrt(norm);
359 callBLAS.SCAL(xr, norm, Xj);
361 callBLAS.SCAL(xr, norm, MXj);
371 timeNorm_MassMult -= MyWatch.WallTime();
373 timeNorm_MassMult += MyWatch.WallTime();
374 numNorm_MassMult += 1;
377 massOrthonormalize(XXj, MXXj, M, Q, 1, 1, WS, kappa);
382 if (rankDef ==
true) {
397 if (allocated ==
true) {
402 timeNorm += MyWatch.WallTime();
409 void ModalTools::localProjection(
int numRow,
int numCol,
int dotLength,
410 double *U,
int ldU,
double *MatV,
int ldV,
411 double *UtMatV,
int ldUtMatV,
double *work)
const {
437 int offSet = numRow*numCol;
439 callBLAS.GEMM(
'T',
'N', numRow, numCol, dotLength, 1.0, U, ldU, MatV, ldV,
440 0.0, work + offSet, numRow);
441 MyComm.SumAll(work + offSet, work, offSet);
443 double *source = work;
444 double *result = UtMatV;
445 int howMany = numRow*
sizeof(double);
447 for (j = 0; j < numCol; ++j) {
448 memcpy(result, source, howMany);
450 source = source + numRow;
451 result = result + ldUtMatV;
457 int ModalTools::directSolver(
int size,
double *KK,
int ldK,
double *MM,
int ldM,
458 int &nev,
double *EV,
int ldEV,
double *theta,
int verbose,
int esType)
const {
518 int NB = 5 + callFortran.LAENV(1,
"dsytrd",
"u", size, -1, -1, -1, 6, 1);
520 double *work =
new double[lwork];
533 kp =
new double[size*size];
534 mp =
new double[size*size];
535 tt =
new double[size];
536 U =
new double[size*size];
542 tmpI =
sizeof(double);
543 for (rank = size; rank > 0; --rank) {
544 memset(kp, 0, size*size*tmpI);
545 for (i = 0; i < rank; ++i) {
546 memcpy(kp + i*size, KK + i*ldK, (i+1)*tmpI);
547 memcpy(mp + i*size, MM + i*ldM, (i+1)*tmpI);
551 callFortran.SYGV(1,
'V',
'U', rank, kp, size, mp, size, tt, work, lwork, &info);
556 cerr <<
" In DSYGV, argument " << -info <<
"has an illegal value.\n";
567 for (i = 0; i < size; ++i) {
568 memcpy(mp + i*size, MM + i*ldM, (i+1)*tmpI);
569 for (j = 0; j < i; ++j)
570 mp[i + j*size] = mp[j + i*size];
572 callBLAS.GEMM(
'N',
'N', size, rank, size, 1.0, mp, size, kp, size, 0.0, U, size);
573 callBLAS.GEMM(
'T',
'N', rank, rank, size, 1.0, kp, size, U, size, 0.0, mp, rank);
574 double maxNorm = 0.0;
575 double maxOrth = 0.0;
576 for (i = 0; i < rank; ++i) {
577 for (j = i; j < rank; ++j) {
579 maxNorm = (fabs(mp[j+i*rank]-1.0) > maxNorm) ? fabs(mp[j+i*rank]-1.0) : maxNorm;
582 maxOrth = (fabs(mp[j+i*rank]) > maxOrth) ? fabs(mp[j+i*rank]) : maxOrth;
587 cout <<
" >> Local eigensolve >> Size: " << rank;
589 cout.setf(ios::scientific, ios::floatfield);
590 cout <<
" Normalization error: " << maxNorm;
591 cout <<
" Orthogonality error: " << maxOrth;
594 if ((maxNorm <= tol) && (maxOrth <= tol))
602 memset(EV, 0, nev*ldEV*tmpI);
603 nev = (rank < nev) ? rank : nev;
604 memcpy(theta, tt, nev*tmpI);
606 for (i = 0; i < nev; ++i) {
607 memcpy(EV + i*ldEV, kp + i*size, tmpI);
617 kp =
new double[size*size];
618 mp =
new double[size*size];
619 tt =
new double[size];
622 tmpI =
sizeof(double);
623 for (i = 0; i < size; ++i) {
624 memcpy(kp + i*size, KK + i*ldK, (i+1)*tmpI);
625 memcpy(mp + i*size, MM + i*ldM, (i+1)*tmpI);
630 callFortran.SYGV(1,
'V',
'U', size, kp, size, mp, size, tt, work, lwork, &info);
636 cerr <<
" In DSYGV, argument " << -info <<
"has an illegal value.\n";
647 cerr <<
" In DSYGV, DPOTRF or DSYEV returned an error code (" << info <<
").\n";
655 memcpy(theta, tt, nev*tmpI);
657 for (i = 0; i < nev; ++i) {
658 memcpy(EV + i*ldEV, kp + i*size, tmpI);
668 kp =
new double[size*size];
669 tt =
new double[size];
672 tmpI =
sizeof(double);
673 for (i=0; i<size; ++i) {
674 memcpy(kp + i*size, KK + i*ldK, (i+1)*tmpI);
678 callFortran.SYEV(
'V',
'U', size, kp, size, tt, work, lwork, &info);
685 cerr <<
" In DSYEV, argument " << -info <<
" has an illegal value\n";
687 cerr <<
" In DSYEV, the algorithm failed to converge (" << info <<
").\n";
695 memcpy(theta, tt, nev*tmpI);
697 for (i = 0; i < nev; ++i) {
698 memcpy(EV + i*ldEV, kp + i*size, tmpI);
virtual const Epetra_Map & OperatorDomainMap() const =0
virtual int Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const =0
float DOT(const int N, const float *X, const float *Y, const int INCX=1, const int INCY=1) const