Anasazi  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
AnasaziLOBPCG.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Anasazi: Block Eigensolvers Package
4 //
5 // Copyright 2004 NTESS and the Anasazi contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 
15 /*
16  LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities
17  X,H,P,R
18  KX,KH,KP (product of K and the above)
19  MX,MH,MP (product of M and the above, not allocated if we don't have an M matrix)
20  If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to
21  compute the local update of X and P.
22 
23  A solver is bound to an eigenproblem at declaration.
24  Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically.
25 
26  The orthogonalization manager is used to project away from the auxiliary vectors.
27  If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis.
28  The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product.
29  LOBPCG will not work correctly if the orthomanager uses a different inner product.
30  */
31 
32 
33 #ifndef ANASAZI_LOBPCG_HPP
34 #define ANASAZI_LOBPCG_HPP
35 
36 #include "AnasaziTypes.hpp"
37 
38 #include "AnasaziEigensolver.hpp"
41 #include "Teuchos_ScalarTraits.hpp"
42 
44 #include "AnasaziSolverUtils.hpp"
45 
46 #include "Teuchos_LAPACK.hpp"
47 #include "Teuchos_BLAS.hpp"
50 #include "Teuchos_TimeMonitor.hpp"
51 
72 namespace Anasazi {
73 
75 
76 
81  template <class ScalarType, class MultiVector>
82  struct LOBPCGState {
89 
96 
103 
113 
116 
119 
120  LOBPCGState() :
121  V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null),
122  X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
123  P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
124  H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
125  R(Teuchos::null),T(Teuchos::null) {};
126  };
127 
129 
131 
132 
146  class LOBPCGRitzFailure : public AnasaziError {public:
147  LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
148  {}};
149 
162  class LOBPCGInitFailure : public AnasaziError {public:
163  LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
164  {}};
165 
182  class LOBPCGOrthoFailure : public AnasaziError {public:
183  LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
184  {}};
185 
187 
188 
189  template <class ScalarType, class MV, class OP>
190  class LOBPCG : public Eigensolver<ScalarType,MV,OP> {
191  public:
192 
194 
195 
205  const Teuchos::RCP<OutputManager<ScalarType> > &printer,
208  Teuchos::ParameterList &params
209  );
210 
212  virtual ~LOBPCG() {};
213 
215 
217 
218 
245  void iterate();
246 
272  void initialize(LOBPCGState<ScalarType,MV>& newstate);
273 
277  void initialize();
278 
298  bool isInitialized() const;
299 
311 
313 
315 
316 
318  int getNumIters() const;
319 
321  void resetNumIters();
322 
331 
337  std::vector<Value<ScalarType> > getRitzValues();
338 
346  std::vector<int> getRitzIndex();
347 
348 
354  std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
355 
356 
362  std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
363 
364 
372  std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms();
373 
374 
383  int getCurSubspaceDim() const;
384 
388  int getMaxSubspaceDim() const;
389 
391 
393 
394 
397 
400 
403 
404 
413  void setBlockSize(int blockSize);
414 
415 
417  int getBlockSize() const;
418 
419 
431  void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs);
432 
435 
437 
439 
440 
446  void setFullOrtho(bool fullOrtho);
447 
449  bool getFullOrtho() const;
450 
452  bool hasP();
453 
455 
457 
458 
460  void currentStatus(std::ostream &os);
461 
463 
464  private:
465  //
466  //
467  //
468  void setupViews();
469  //
470  // Convenience typedefs
471  //
472  typedef SolverUtils<ScalarType,MV,OP> Utils;
473  typedef MultiVecTraits<ScalarType,MV> MVT;
476  typedef typename SCT::magnitudeType MagnitudeType;
477  const MagnitudeType ONE;
478  const MagnitudeType ZERO;
479  const MagnitudeType NANVAL;
480  //
481  // Internal structs
482  //
483  struct CheckList {
484  bool checkX, checkMX, checkKX;
485  bool checkH, checkMH;
486  bool checkP, checkMP, checkKP;
487  bool checkR, checkQ;
488  CheckList() : checkX(false),checkMX(false),checkKX(false),
489  checkH(false),checkMH(false),
490  checkP(false),checkMP(false),checkKP(false),
491  checkR(false),checkQ(false) {};
492  };
493  //
494  // Internal methods
495  //
496  std::string accuracyCheck(const CheckList &chk, const std::string &where) const;
497  //
498  // Classes inputed through constructor that define the eigenproblem to be solved.
499  //
505  //
506  // Information obtained from the eigenproblem
507  //
511  bool hasM_;
512  //
513  // Internal timers
514  //
515  Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
516  timerSort_,
517  timerLocalProj_, timerDS_,
518  timerLocalUpdate_, timerCompRes_,
519  timerOrtho_, timerInit_;
520  //
521  // Counters
522  //
523  // Number of operator applications
524  int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
525 
526  //
527  // Algorithmic parameters.
528  //
529  // blockSize_ is the solver block size
530  int blockSize_;
531  //
532  // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should
533  // be activated (see citations at the top of this file)
534  bool fullOrtho_;
535 
536  //
537  // Current solver state
538  //
539  // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
540  // is capable of running; _initialize is controlled by the initialize() member method
541  // For the implications of the state of initialized_, please see documentation for initialize()
542  bool initialized_;
543  //
544  // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_)
545  // this tells us how many of the values in theta_ are valid Ritz values
546  int nevLocal_;
547  //
548  // hasP_ tells us whether there is valid data in P (and KP,MP)
549  bool hasP_;
550  //
551  // State Multivecs
552  // V_, KV_ MV_ and R_ are primary pointers to allocated multivectors
553  Teuchos::RCP<MV> V_, KV_, MV_, R_;
554  // the rest are multivector views into V_, KV_ and MV_
555  Teuchos::RCP<MV> X_, KX_, MX_,
556  H_, KH_, MH_,
557  P_, KP_, MP_;
558 
559  //
560  // if fullOrtho_ == true, then we must produce the following on every iteration:
561  // [newX newP] = [X H P] [CX;CP]
562  // the structure of [CX;CP] when using full orthogonalization does not allow us to
563  // do this in situ, and R_ does not have enough storage for newX and newP. therefore,
564  // we must allocate additional storage for this.
565  // otherwise, when not using full orthogonalization, the structure
566  // [newX newP] = [X H P] [CX1 0 ]
567  // [CX2 CP2] allows us to work using only R as work space
568  // [CX3 CP3]
569  Teuchos::RCP<MV> tmpmvec_;
570  //
571  // auxiliary vectors
573  int numAuxVecs_;
574  //
575  // Number of iterations that have been performed.
576  int iter_;
577  //
578  // Current eigenvalues, residual norms
579  std::vector<MagnitudeType> theta_, Rnorms_, R2norms_;
580  //
581  // are the residual norms current with the residual?
582  bool Rnorms_current_, R2norms_current_;
583 
584  };
585 
586 
587 
588 
590  // Constructor
591  template <class ScalarType, class MV, class OP>
595  const Teuchos::RCP<OutputManager<ScalarType> > &printer,
598  Teuchos::ParameterList &params
599  ) :
600  ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
601  ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
602  NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
603  // problem, tools
604  problem_(problem),
605  sm_(sorter),
606  om_(printer),
607  tester_(tester),
608  orthman_(ortho),
609  // timers, counters
610 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
611  timerOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Op*x")),
612  timerMOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation M*x")),
613  timerPrec_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Prec*x")),
614  timerSort_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Sorting eigenvalues")),
615  timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local projection")),
616  timerDS_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Direct solve")),
617  timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local update")),
618  timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Computing residuals")),
619  timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Orthogonalization")),
620  timerInit_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Initialization")),
621 #endif
622  count_ApplyOp_(0),
623  count_ApplyM_(0),
624  count_ApplyPrec_(0),
625  // internal data
626  blockSize_(0),
627  fullOrtho_(params.get("Full Ortho", true)),
628  initialized_(false),
629  nevLocal_(0),
630  hasP_(false),
631  auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ),
632  numAuxVecs_(0),
633  iter_(0),
634  Rnorms_current_(false),
635  R2norms_current_(false)
636  {
637  TEUCHOS_TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
638  "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
639  TEUCHOS_TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
640  "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
641  TEUCHOS_TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
642  "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
643  TEUCHOS_TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
644  "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
645  TEUCHOS_TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
646  "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
647  TEUCHOS_TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
648  "Anasazi::LOBPCG::constructor: problem is not set.");
649  TEUCHOS_TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
650  "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem.");
651 
652  // get the problem operators
653  Op_ = problem_->getOperator();
654  TEUCHOS_TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
655  "Anasazi::LOBPCG::constructor: problem provides no operator.");
656  MOp_ = problem_->getM();
657  Prec_ = problem_->getPrec();
658  hasM_ = (MOp_ != Teuchos::null);
659 
660  // set the block size and allocate data
661  int bs = params.get("Block Size", problem_->getNEV());
662  setBlockSize(bs);
663  }
664 
665 
667  // Set the block size and make necessary adjustments.
668  template <class ScalarType, class MV, class OP>
670  {
671  // time spent here counts towards timerInit_
672 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
673  Teuchos::TimeMonitor lcltimer( *timerInit_ );
674 #endif
675 
676  // This routine only allocates space; it doesn't not perform any computation
677  // if size is decreased, take the first newBS vectors of all and leave state as is
678  // otherwise, grow/allocate space and set solver to unitialized
679 
681  // grab some Multivector to Clone
682  // in practice, getInitVec() should always provide this, but it is possible to use a
683  // Eigenproblem with nothing in getInitVec() by manually initializing with initialize();
684  // in case of that strange scenario, we will try to Clone from R_ because it is smaller
685  // than V_, and we don't want to keep V_ around longer than necessary
686  if (blockSize_ > 0) {
687  tmp = R_;
688  }
689  else {
690  tmp = problem_->getInitVec();
691  TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
692  "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from.");
693  }
694 
695  TEUCHOS_TEST_FOR_EXCEPTION(newBS <= 0 || static_cast<ptrdiff_t>(newBS) > MVT::GetGlobalLength(*tmp),
696  std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive.");
697  if (newBS == blockSize_) {
698  // do nothing
699  return;
700  }
701  else if (newBS < blockSize_ && initialized_) {
702  //
703  // shrink vectors
704 
705  // release views so we can modify the bases
706  X_ = Teuchos::null;
707  KX_ = Teuchos::null;
708  MX_ = Teuchos::null;
709  H_ = Teuchos::null;
710  KH_ = Teuchos::null;
711  MH_ = Teuchos::null;
712  P_ = Teuchos::null;
713  KP_ = Teuchos::null;
714  MP_ = Teuchos::null;
715 
716  // make new indices vectors
717  std::vector<int> newind(newBS), oldind(newBS);
718  for (int i=0; i<newBS; i++) {
719  newind[i] = i;
720  oldind[i] = i;
721  }
722 
723  Teuchos::RCP<MV> newV, newMV, newKV, newR;
725  // allocate R and newV
726  newR = MVT::Clone(*tmp,newBS);
727  newV = MVT::Clone(*tmp,newBS*3);
728  newKV = MVT::Clone(*tmp,newBS*3);
729  if (hasM_) {
730  newMV = MVT::Clone(*tmp,newBS*3);
731  }
732 
733  //
734  // if we are initialized, we want to pull the data from V_ into newV:
735  // bs | bs | bs
736  // newV = [newX | **** |newP ]
737  // newKV = [newKX| **** |newKP]
738  // newMV = [newMX| **** |newMP]
739  // where
740  // oldbs | oldbs | oldbs
741  // V_ = [newX *** | ******* | newP ***]
742  // KV_ = [newKX *** | ******* | newKP ***]
743  // MV_ = [newMX *** | ******* | newMP ***]
744  //
745  // we don't care to copy the data corresponding to H
746  // we will not copy the M data if !hasM_, because it doesn't exist
747  //
748 
749  // these are shrink operations which preserve their data
750  theta_.resize(3*newBS);
751  Rnorms_.resize(newBS);
752  R2norms_.resize(newBS);
753 
754  // copy residual vectors: oldind,newind currently contains [0,...,newBS-1]
755  src = MVT::CloneView(*R_,newind);
756  MVT::SetBlock(*src,newind,*newR);
757  // free old memory and point to new memory
758  R_ = newR;
759 
760  // copy in order: newX newKX newMX, then newP newKP newMP
761  // for X: [0,bs-1] <-- [0,bs-1]
762  src = MVT::CloneView(*V_,oldind);
763  MVT::SetBlock(*src,newind,*newV);
764  src = MVT::CloneView(*KV_,oldind);
765  MVT::SetBlock(*src,newind,*newKV);
766  if (hasM_) {
767  src = MVT::CloneView(*MV_,oldind);
768  MVT::SetBlock(*src,newind,*newMV);
769  }
770  // for P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1]
771  for (int i=0; i<newBS; i++) {
772  newind[i] += 2*newBS;
773  oldind[i] += 2*blockSize_;
774  }
775  src = MVT::CloneView(*V_,oldind);
776  MVT::SetBlock(*src,newind,*newV);
777  src = MVT::CloneView(*KV_,oldind);
778  MVT::SetBlock(*src,newind,*newKV);
779  if (hasM_) {
780  src = MVT::CloneView(*MV_,oldind);
781  MVT::SetBlock(*src,newind,*newMV);
782  }
783 
784  // release temp view
785  src = Teuchos::null;
786 
787  // release old allocations and point at new memory
788  V_ = newV;
789  KV_ = newKV;
790  if (hasM_) {
791  MV_ = newMV;
792  }
793  else {
794  MV_ = V_;
795  }
796  }
797  else {
798  // newBS > blockSize_ or not initialized
799  // this is also the scenario for our initial call to setBlockSize(), in the constructor
800  initialized_ = false;
801  hasP_ = false;
802 
803  // release views
804  X_ = Teuchos::null;
805  KX_ = Teuchos::null;
806  MX_ = Teuchos::null;
807  H_ = Teuchos::null;
808  KH_ = Teuchos::null;
809  MH_ = Teuchos::null;
810  P_ = Teuchos::null;
811  KP_ = Teuchos::null;
812  MP_ = Teuchos::null;
813 
814  // free allocated storage
815  R_ = Teuchos::null;
816  V_ = Teuchos::null;
817 
818  // allocate scalar vectors
819  theta_.resize(3*newBS,NANVAL);
820  Rnorms_.resize(newBS,NANVAL);
821  R2norms_.resize(newBS,NANVAL);
822 
823  // clone multivectors off of tmp
824  R_ = MVT::Clone(*tmp,newBS);
825  V_ = MVT::Clone(*tmp,newBS*3);
826  KV_ = MVT::Clone(*tmp,newBS*3);
827  if (hasM_) {
828  MV_ = MVT::Clone(*tmp,newBS*3);
829  }
830  else {
831  MV_ = V_;
832  }
833  }
834 
835  // allocate tmp space
836  tmpmvec_ = Teuchos::null;
837  if (fullOrtho_) {
838  tmpmvec_ = MVT::Clone(*tmp,newBS);
839  }
840 
841  // set new block size
842  blockSize_ = newBS;
843 
844  // setup new views
845  setupViews();
846  }
847 
848 
850  // Setup views into V,KV,MV
851  template <class ScalarType, class MV, class OP>
853  {
854  std::vector<int> ind(blockSize_);
855 
856  for (int i=0; i<blockSize_; i++) {
857  ind[i] = i;
858  }
859  X_ = MVT::CloneViewNonConst(*V_,ind);
860  KX_ = MVT::CloneViewNonConst(*KV_,ind);
861  if (hasM_) {
862  MX_ = MVT::CloneViewNonConst(*MV_,ind);
863  }
864  else {
865  MX_ = X_;
866  }
867 
868  for (int i=0; i<blockSize_; i++) {
869  ind[i] += blockSize_;
870  }
871  H_ = MVT::CloneViewNonConst(*V_,ind);
872  KH_ = MVT::CloneViewNonConst(*KV_,ind);
873  if (hasM_) {
874  MH_ = MVT::CloneViewNonConst(*MV_,ind);
875  }
876  else {
877  MH_ = H_;
878  }
879 
880  for (int i=0; i<blockSize_; i++) {
881  ind[i] += blockSize_;
882  }
883  P_ = MVT::CloneViewNonConst(*V_,ind);
884  KP_ = MVT::CloneViewNonConst(*KV_,ind);
885  if (hasM_) {
886  MP_ = MVT::CloneViewNonConst(*MV_,ind);
887  }
888  else {
889  MP_ = P_;
890  }
891  }
892 
893 
895  // Set the auxiliary vectors
896  template <class ScalarType, class MV, class OP>
898  typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv;
899 
900  // set new auxiliary vectors
901  auxVecs_ = auxvecs;
902 
903  numAuxVecs_ = 0;
904  for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
905  numAuxVecs_ += MVT::GetNumberVecs(**i);
906  }
907 
908  // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors
909  if (numAuxVecs_ > 0 && initialized_) {
910  initialized_ = false;
911  hasP_ = false;
912  }
913 
914  if (om_->isVerbosity( Debug ) ) {
915  // Check almost everything here
916  CheckList chk;
917  chk.checkQ = true;
918  om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
919  }
920  }
921 
922 
924  /* Initialize the state of the solver
925  *
926  * POST-CONDITIONS:
927  *
928  * initialized_ == true
929  * X is orthonormal, orthogonal to auxVecs_
930  * KX = Op*X
931  * MX = M*X if hasM_
932  * theta_ contains Ritz values of X
933  * R = KX - MX*diag(theta_)
934  * if hasP() == true,
935  * P orthogonal to auxVecs_
936  * if fullOrtho_ == true,
937  * P orthonormal and orthogonal to X
938  * KP = Op*P
939  * MP = M*P
940  */
941  template <class ScalarType, class MV, class OP>
943  {
944  // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone
945  // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives
946 
947 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
948  Teuchos::TimeMonitor inittimer( *timerInit_ );
949 #endif
950 
951  std::vector<int> bsind(blockSize_);
952  for (int i=0; i<blockSize_; i++) bsind[i] = i;
953 
954  // in LOBPCG, X (the subspace iterate) is primary
955  // the order of dependence follows like so.
956  // --init-> X
957  // --op apply-> MX,KX
958  // --ritz analysis-> theta
959  // --optional-> P,MP,KP
960  //
961  // if the user specifies all data for a level, we will accept it.
962  // otherwise, we will generate the whole level, and all subsequent levels.
963  //
964  // the data members are ordered based on dependence, and the levels are
965  // partitioned according to the amount of work required to produce the
966  // items in a level.
967  //
968  // inconsitent multivectors widths and lengths will not be tolerated, and
969  // will be treated with exceptions.
970 
971  // set up X, KX, MX: get them from "state" if user specified them
972 
973  //----------------------------------------
974  // set up X, MX, KX
975  //----------------------------------------
976  if (newstate.X != Teuchos::null) {
977  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.X) != MVT::GetGlobalLength(*X_),
978  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." );
979  // newstate.X must have blockSize_ vectors; any more will be ignored
980  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_,
981  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors.");
982 
983  // put X data in X_
984  MVT::SetBlock(*newstate.X,bsind,*X_);
985 
986  // put MX data in MX_
987  if (hasM_) {
988  if (newstate.MX != Teuchos::null) {
989  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.MX) != MVT::GetGlobalLength(*MX_),
990  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." );
991  // newstate.MX must have blockSize_ vectors; any more will be ignored
992  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_,
993  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors.");
994  MVT::SetBlock(*newstate.MX,bsind,*MX_);
995  }
996  else {
997  // user didn't specify MX, compute it
998  {
999 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1000  Teuchos::TimeMonitor lcltimer( *timerMOp_ );
1001 #endif
1002  OPT::Apply(*MOp_,*X_,*MX_);
1003  count_ApplyM_ += blockSize_;
1004  }
1005  // we generated MX; we will generate R as well
1006  newstate.R = Teuchos::null;
1007  }
1008  }
1009 
1010  // put data in KX
1011  if (newstate.KX != Teuchos::null) {
1012  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.KX) != MVT::GetGlobalLength(*KX_),
1013  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." );
1014  // newstate.KX must have blockSize_ vectors; any more will be ignored
1015  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_,
1016  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors.");
1017  MVT::SetBlock(*newstate.KX,bsind,*KX_);
1018  }
1019  else {
1020  // user didn't specify KX, compute it
1021  {
1022 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1023  Teuchos::TimeMonitor lcltimer( *timerOp_ );
1024 #endif
1025  OPT::Apply(*Op_,*X_,*KX_);
1026  count_ApplyOp_ += blockSize_;
1027  }
1028  // we generated KX; we will generate R as well
1029  newstate.R = Teuchos::null;
1030  }
1031  }
1032  else {
1033  // user did not specify X
1034  // we will initialize X, compute KX and MX, and compute R
1035  //
1036  // clear state so we won't use any data from it below
1037  newstate.P = Teuchos::null;
1038  newstate.KP = Teuchos::null;
1039  newstate.MP = Teuchos::null;
1040  newstate.R = Teuchos::null;
1041  newstate.T = Teuchos::null;
1042 
1043  // generate a basis and projectAndNormalize
1044  Teuchos::RCP<const MV> ivec = problem_->getInitVec();
1045  TEUCHOS_TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
1046  "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from.");
1047 
1048  int initSize = MVT::GetNumberVecs(*ivec);
1049  if (initSize > blockSize_) {
1050  // we need only the first blockSize_ vectors from ivec; get a view of them
1051  initSize = blockSize_;
1052  std::vector<int> ind(blockSize_);
1053  for (int i=0; i<blockSize_; i++) ind[i] = i;
1054  ivec = MVT::CloneView(*ivec,ind);
1055  }
1056 
1057  // assign ivec to first part of X_
1058  if (initSize > 0) {
1059  std::vector<int> ind(initSize);
1060  for (int i=0; i<initSize; i++) ind[i] = i;
1061  MVT::SetBlock(*ivec,ind,*X_);
1062  }
1063  // fill the rest of X_ with random
1064  if (blockSize_ > initSize) {
1065  std::vector<int> ind(blockSize_ - initSize);
1066  for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
1067  Teuchos::RCP<MV> rX = MVT::CloneViewNonConst(*X_,ind);
1068  MVT::MvRandom(*rX);
1069  rX = Teuchos::null;
1070  }
1071 
1072  // put data in MX
1073  if (hasM_) {
1074 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1075  Teuchos::TimeMonitor lcltimer( *timerMOp_ );
1076 #endif
1077  OPT::Apply(*MOp_,*X_,*MX_);
1078  count_ApplyM_ += blockSize_;
1079  }
1080 
1081  // remove auxVecs from X_ and normalize it
1082  if (numAuxVecs_ > 0) {
1083 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1084  Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
1085 #endif
1087  int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_);
1088  TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
1089  "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
1090  }
1091  else {
1092 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1093  Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
1094 #endif
1095  int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_);
1096  TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
1097  "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
1098  }
1099 
1100  // put data in KX
1101  {
1102 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1103  Teuchos::TimeMonitor lcltimer( *timerOp_ );
1104 #endif
1105  OPT::Apply(*Op_,*X_,*KX_);
1106  count_ApplyOp_ += blockSize_;
1107  }
1108  } // end if (newstate.X != Teuchos::null)
1109 
1110 
1111  //----------------------------------------
1112  // set up Ritz values
1113  //----------------------------------------
1114  theta_.resize(3*blockSize_,NANVAL);
1115  if (newstate.T != Teuchos::null) {
1116  TEUCHOS_TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_,
1117  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values.");
1118  for (int i=0; i<blockSize_; i++) {
1119  theta_[i] = (*newstate.T)[i];
1120  }
1121  nevLocal_ = blockSize_;
1122  }
1123  else {
1124  // get ritz vecs/vals
1125  Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
1126  MM(blockSize_,blockSize_),
1127  S(blockSize_,blockSize_);
1128  {
1129 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1130  Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
1131 #endif
1132  // project K
1133  MVT::MvTransMv(ONE,*X_,*KX_,KK);
1134  // project M
1135  MVT::MvTransMv(ONE,*X_,*MX_,MM);
1136  nevLocal_ = blockSize_;
1137  }
1138 
1139  // solve the projected problem
1140  {
1141 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1142  Teuchos::TimeMonitor lcltimer( *timerDS_ );
1143 #endif
1144  Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1);
1145  TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
1146  "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm.");
1147  }
1148 
1149  // We only have blockSize_ ritz pairs, ergo we do not need to select.
1150  // However, we still require them to be ordered correctly
1151  {
1152 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1153  Teuchos::TimeMonitor lcltimer( *timerSort_ );
1154 #endif
1155 
1156  std::vector<int> order(blockSize_);
1157  //
1158  // sort the first blockSize_ values in theta_
1159  sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_); // don't catch exception
1160  //
1161  // apply the same ordering to the primitive ritz vectors
1162  Utils::permuteVectors(order,S);
1163  }
1164 
1165  // update the solution, use R for storage
1166  {
1167 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1168  Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
1169 #endif
1170  // X <- X*S
1171  MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );
1172  MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
1173  // KX <- KX*S
1174  MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
1175  MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
1176  if (hasM_) {
1177  // MX <- MX*S
1178  MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );
1179  MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
1180  }
1181  }
1182  }
1183 
1184  //----------------------------------------
1185  // compute R
1186  //----------------------------------------
1187  if (newstate.R != Teuchos::null) {
1188  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.R) != MVT::GetGlobalLength(*R_),
1189  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." );
1190  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_,
1191  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." );
1192  MVT::SetBlock(*newstate.R,bsind,*R_);
1193  }
1194  else {
1195 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1196  Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
1197 #endif
1198  // form R <- KX - MX*T
1199  MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
1200  Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
1201  for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
1202  MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
1203  }
1204 
1205  // R has been updated; mark the norms as out-of-date
1206  Rnorms_current_ = false;
1207  R2norms_current_ = false;
1208 
1209  // put data in P,KP,MP: P is not used to set theta
1210  if (newstate.P != Teuchos::null) {
1211  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ ,
1212  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." );
1213  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.P) != MVT::GetGlobalLength(*P_),
1214  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." );
1215  hasP_ = true;
1216 
1217  // set P_
1218  MVT::SetBlock(*newstate.P,bsind,*P_);
1219 
1220  // set/compute KP_
1221  if (newstate.KP != Teuchos::null) {
1222  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_,
1223  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." );
1224  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.KP) != MVT::GetGlobalLength(*KP_),
1225  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." );
1226  MVT::SetBlock(*newstate.KP,bsind,*KP_);
1227  }
1228  else {
1229 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1230  Teuchos::TimeMonitor lcltimer( *timerOp_ );
1231 #endif
1232  OPT::Apply(*Op_,*P_,*KP_);
1233  count_ApplyOp_ += blockSize_;
1234  }
1235 
1236  // set/compute MP_
1237  if (hasM_) {
1238  if (newstate.MP != Teuchos::null) {
1239  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_,
1240  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." );
1241  TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.MP) != MVT::GetGlobalLength(*MP_),
1242  std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." );
1243  MVT::SetBlock(*newstate.MP,bsind,*MP_);
1244  }
1245  else {
1246 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1247  Teuchos::TimeMonitor lcltimer( *timerMOp_ );
1248 #endif
1249  OPT::Apply(*MOp_,*P_,*MP_);
1250  count_ApplyM_ += blockSize_;
1251  }
1252  }
1253  }
1254  else {
1255  hasP_ = false;
1256  }
1257 
1258  // finally, we are initialized
1259  initialized_ = true;
1260 
1261  if (om_->isVerbosity( Debug ) ) {
1262  // Check almost everything here
1263  CheckList chk;
1264  chk.checkX = true;
1265  chk.checkKX = true;
1266  chk.checkMX = true;
1267  chk.checkP = true;
1268  chk.checkKP = true;
1269  chk.checkMP = true;
1270  chk.checkR = true;
1271  chk.checkQ = true;
1272  om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
1273  }
1274 
1275  }
1276 
1277  template <class ScalarType, class MV, class OP>
1279  {
1281  initialize(empty);
1282  }
1283 
1284 
1286  // Instruct the solver to use full orthogonalization
1287  template <class ScalarType, class MV, class OP>
1289  {
1290  if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
1291  // state is already orthogonalized or solver is not initialized
1292  fullOrtho_ = fullOrtho;
1293  }
1294  else {
1295  // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization
1296  // ergo, we must throw away data in P
1297  fullOrtho_ = true;
1298  hasP_ = false;
1299  }
1300 
1301  // the user has called setFullOrtho, so the class has been instantiated
1302  // ergo, the data has already been allocated, i.e., setBlockSize() has been called
1303  // if it is already allocated, it should be the proper size
1304  if (fullOrtho_ && tmpmvec_ == Teuchos::null) {
1305  // allocated the workspace
1306  tmpmvec_ = MVT::Clone(*X_,blockSize_);
1307  }
1308  else if (fullOrtho_==false) {
1309  // free the workspace
1310  tmpmvec_ = Teuchos::null;
1311  }
1312  }
1313 
1314 
1316  // Perform LOBPCG iterations until the StatusTest tells us to stop.
1317  template <class ScalarType, class MV, class OP>
1319  {
1320  //
1321  // Allocate/initialize data structures
1322  //
1323  if (initialized_ == false) {
1324  initialize();
1325  }
1326 
1327  //
1328  // Miscellaneous definitions
1329  const int oneBlock = blockSize_;
1330  const int twoBlocks = 2*blockSize_;
1331  const int threeBlocks = 3*blockSize_;
1332 
1333  std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_);
1334  for (int i=0; i<blockSize_; i++) {
1335  indblock1[i] = i;
1336  indblock2[i] = i + blockSize_;
1337  indblock3[i] = i + 2*blockSize_;
1338  }
1339 
1340  //
1341  // Define dense projected/local matrices
1342  // KK = Local stiffness matrix (size: 3*blockSize_ x 3*blockSize_)
1343  // MM = Local mass matrix (size: 3*blockSize_ x 3*blockSize_)
1344  // S = Local eigenvectors (size: 3*blockSize_ x 3*blockSize_)
1345  Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ),
1346  MM( threeBlocks, threeBlocks ),
1347  S( threeBlocks, threeBlocks );
1348 
1349  while (tester_->checkStatus(this) != Passed) {
1350 
1351  // Print information on current status
1352  if (om_->isVerbosity(Debug)) {
1353  currentStatus( om_->stream(Debug) );
1354  }
1355  else if (om_->isVerbosity(IterationDetails)) {
1356  currentStatus( om_->stream(IterationDetails) );
1357  }
1358 
1359  // increment iteration counter
1360  iter_++;
1361 
1362  // Apply the preconditioner on the residuals: H <- Prec*R
1363  if (Prec_ != Teuchos::null) {
1364 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1365  Teuchos::TimeMonitor lcltimer( *timerPrec_ );
1366 #endif
1367  OPT::Apply( *Prec_, *R_, *H_ ); // don't catch the exception
1368  count_ApplyPrec_ += blockSize_;
1369  }
1370  else {
1371  MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_);
1372  }
1373 
1374  // Apply the mass matrix on H
1375  if (hasM_) {
1376 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1377  Teuchos::TimeMonitor lcltimer( *timerMOp_ );
1378 #endif
1379  OPT::Apply( *MOp_, *H_, *MH_); // don't catch the exception
1380  count_ApplyM_ += blockSize_;
1381  }
1382 
1383  // orthogonalize H against the auxiliary vectors
1384  // optionally: orthogonalize H against X and P ([X P] is already orthonormal)
1386  Q = auxVecs_;
1387  if (fullOrtho_) {
1388  // X and P are not contiguous, so there is not much point in putting them under
1389  // a single multivector view
1390  Q.push_back(X_);
1391  if (hasP_) {
1392  Q.push_back(P_);
1393  }
1394  }
1395  {
1396 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1397  Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
1398 #endif
1400  Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
1401  int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_);
1402  // our views are currently in place; it is safe to throw an exception
1404  "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H.");
1405  }
1406 
1407  if (om_->isVerbosity( Debug ) ) {
1408  CheckList chk;
1409  chk.checkH = true;
1410  chk.checkMH = true;
1411  om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
1412  }
1413  else if (om_->isVerbosity( OrthoDetails ) ) {
1414  CheckList chk;
1415  chk.checkH = true;
1416  chk.checkMH = true;
1417  om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
1418  }
1419 
1420  // Apply the stiffness matrix to H
1421  {
1422 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1423  Teuchos::TimeMonitor lcltimer( *timerOp_ );
1424 #endif
1425  OPT::Apply( *Op_, *H_, *KH_); // don't catch the exception
1426  count_ApplyOp_ += blockSize_;
1427  }
1428 
1429  if (hasP_) {
1430  nevLocal_ = threeBlocks;
1431  }
1432  else {
1433  nevLocal_ = twoBlocks;
1434  }
1435 
1436  //
1437  // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false)
1438  // we need to perform the following operations:
1439  // X' [KX KH KP]
1440  // X' [MX MH MP]
1441  // H' [KH KP]
1442  // H' [MH MP]
1443  // P' [KP]
1444  // P' [MP]
1445  // [X H P] CX
1446  // [X H P] CP if fullOrtho
1447  // [H P] CP if !fullOrtho
1448  //
1449  // since M[X H P] is potentially the same memory as [X H P], and
1450  // because we are not allowed to have overlapping non-const views of
1451  // a multivector, we will now abandon our non-const views in favor of
1452  // const views
1453  //
1454  X_ = Teuchos::null;
1455  KX_ = Teuchos::null;
1456  MX_ = Teuchos::null;
1457  H_ = Teuchos::null;
1458  KH_ = Teuchos::null;
1459  MH_ = Teuchos::null;
1460  P_ = Teuchos::null;
1461  KP_ = Teuchos::null;
1462  MP_ = Teuchos::null;
1463  Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P;
1464  {
1465  cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1);
1466  cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2);
1467 
1468  std::vector<int> indXHP(nevLocal_);
1469  for (int i=0; i<nevLocal_; i++) {
1470  indXHP[i] = i;
1471  }
1472  cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP);
1473  cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP);
1474  if (hasM_) {
1475  cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP);
1476  }
1477  else {
1478  cM_XHP = cXHP;
1479  }
1480 
1481  std::vector<int> indHP(nevLocal_-blockSize_);
1482  for (int i=blockSize_; i<nevLocal_; i++) {
1483  indHP[i-blockSize_] = i;
1484  }
1485  cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP);
1486  cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP);
1487  if (hasM_) {
1488  cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP);
1489  }
1490  else {
1491  cM_HP = cHP;
1492  }
1493 
1494  if (nevLocal_ == threeBlocks) {
1495  cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3);
1496  cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3);
1497  if (hasM_) {
1498  cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3);
1499  }
1500  else {
1501  cM_P = cP;
1502  }
1503  }
1504  }
1505 
1506  //
1507  //----------------------------------------
1508  // Form "local" mass and stiffness matrices
1509  //----------------------------------------
1510  {
1511  // We will form only the block upper triangular part of
1512  // [X H P]' K [X H P] and [X H P]' M [X H P]
1513  // Get the necessary views into KK and MM:
1514  // [--K1--] [--M1--]
1515  // KK = [ -K2-] MM = [ -M2-]
1516  // [ K3] [ M3]
1517  //
1518  // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix
1519  //
1521  K1(Teuchos::View,KK,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_),
1522  K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
1523  K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_),
1524  M1(Teuchos::View,MM,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_),
1525  M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
1526  M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_);
1527  {
1528 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1529  Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
1530 #endif
1531  MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 );
1532  MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 );
1533  MVT::MvTransMv( ONE, *cH, *cK_HP , K2 );
1534  MVT::MvTransMv( ONE, *cH, *cM_HP , M2 );
1535  if (nevLocal_ == threeBlocks) {
1536  MVT::MvTransMv( ONE, *cP, *cK_P, K3 );
1537  MVT::MvTransMv( ONE, *cP, *cM_P, M3 );
1538  }
1539  }
1540  }
1541  // below, we only need bases [X H P] and [H P] and friends
1542  // furthermore, we only need [H P] and friends if fullOrtho == false
1543  // clear the others now
1544  cX = Teuchos::null;
1545  cH = Teuchos::null;
1546  cP = Teuchos::null;
1547  cK_P = Teuchos::null;
1548  cM_P = Teuchos::null;
1549  if (fullOrtho_ == true) {
1550  cHP = Teuchos::null;
1551  cK_HP = Teuchos::null;
1552  cM_HP = Teuchos::null;
1553  }
1554 
1555  //
1556  //---------------------------------------------------
1557  // Perform a spectral decomposition of (KK,MM)
1558  //---------------------------------------------------
1559  //
1560  // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis
1561  Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_),
1562  lclMM(Teuchos::View,MM,nevLocal_,nevLocal_),
1563  lclS(Teuchos::View, S,nevLocal_,nevLocal_);
1564  {
1565 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1566  Teuchos::TimeMonitor lcltimer( *timerDS_ );
1567 #endif
1568  int localSize = nevLocal_;
1569  Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0);
1570  // localSize tells directSolver() how big KK,MM are
1571  // however, directSolver() may choose to use only the principle submatrices of KK,MM
1572  // because of loss of MM-orthogonality in the projected eigenvectors
1573  // nevLocal_ tells us how much it used, telling us the effective localSize
1574  // (i.e., how much of KK,MM used by directSolver)
1575  // we will not tolerate any indefiniteness, and will throw an exception if it was
1576  // detected by directSolver
1577  //
1578  if (nevLocal_ != localSize) {
1579  // before throwing the exception, and thereby leaving iterate(), setup the views again
1580  // first, clear the const views
1581  cXHP = Teuchos::null;
1582  cK_XHP = Teuchos::null;
1583  cM_XHP = Teuchos::null;
1584  cHP = Teuchos::null;
1585  cK_HP = Teuchos::null;
1586  cM_HP = Teuchos::null;
1587  setupViews();
1588  }
1589  TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure,
1590  "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." );
1591  }
1592 
1593  //
1594  //---------------------------------------------------
1595  // Sort the ritz values using the sort manager
1596  //---------------------------------------------------
1599  {
1600 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1601  Teuchos::TimeMonitor lcltimer( *timerSort_ );
1602 #endif
1603 
1604  std::vector<int> order(nevLocal_);
1605  //
1606  // Sort the first nevLocal_ values in theta_
1607  sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_); // don't catch exception
1608  //
1609  // Sort the primitive ritz vectors
1610  Utils::permuteVectors(order,lclS);
1611  }
1612 
1613  //
1614  //----------------------------------------
1615  // Compute coefficients for X and P under [X H P]
1616  //----------------------------------------
1617  // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper
1618  // CX will be the coefficients of [X,H,P] for new X, CP for new P
1619  // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM
1620  // Here, we will also orthonormalize CX.
1621  // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP]
1623  if (fullOrtho_) {
1624  // build orthonormal basis for ( 0 ) that is MM orthogonal to ( S11 )
1625  // ( S21 ) ( S21 )
1626  // ( S31 ) ( S31 )
1627  // Do this using Cholesky factorization of ( S11 0 )
1628  // ( S21 S21 )
1629  // ( S31 S31 )
1630  // ( S11 0 )
1631  // Build C = ( S21 S21 )
1632  // ( S31 S31 )
1633  Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks),
1634  MMC(nevLocal_,twoBlocks),
1635  CMMC(twoBlocks ,twoBlocks);
1636 
1637  // first block of rows: ( S11 0 )
1638  for (int j=0; j<oneBlock; j++) {
1639  for (int i=0; i<oneBlock; i++) {
1640  // CX
1641  C(i,j) = lclS(i,j);
1642  // CP
1643  C(i,j+oneBlock) = ZERO;
1644  }
1645  }
1646  // second block of rows: (S21 S21)
1647  for (int j=0; j<oneBlock; j++) {
1648  for (int i=oneBlock; i<twoBlocks; i++) {
1649  // CX
1650  C(i,j) = lclS(i,j);
1651  // CP
1652  C(i,j+oneBlock) = lclS(i,j);
1653  }
1654  }
1655  // third block of rows
1656  if (nevLocal_ == threeBlocks) {
1657  for (int j=0; j<oneBlock; j++) {
1658  for (int i=twoBlocks; i<threeBlocks; i++) {
1659  // CX
1660  C(i,j) = lclS(i,j);
1661  // CP
1662  C(i,j+oneBlock) = lclS(i,j);
1663  }
1664  }
1665  }
1666 
1667  // compute MMC = lclMM*C
1668  {
1669  int teuchosret;
1670  teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO);
1671  TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1672  "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1673  // compute CMMC = C^H*MMC == C^H*lclMM*C
1674  teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO);
1675  TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1676  "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1677  }
1678 
1679  // compute R (cholesky) of CMMC
1680  {
1681  int info;
1682  lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info);
1683  // our views ARE NOT currently in place; we must reestablish them before throwing an exception
1684  if (info != 0) {
1685  // Check symmetry of H'*K*H, this might be the first place a non-Hermitian operator will cause a problem.
1686  Teuchos::SerialDenseMatrix<int,ScalarType> K22(Teuchos::View,KK,blockSize_,blockSize_,1*blockSize_,1*blockSize_);
1688  K22_trans -= K22;
1689  MagnitudeType symNorm = K22_trans.normOne();
1690  MagnitudeType tol = SCT::magnitude(SCT::squareroot(SCT::eps()));
1691 
1692  cXHP = Teuchos::null;
1693  cHP = Teuchos::null;
1694  cK_XHP = Teuchos::null;
1695  cK_HP = Teuchos::null;
1696  cM_XHP = Teuchos::null;
1697  cM_HP = Teuchos::null;
1698  setupViews();
1699 
1700  std::string errMsg = "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization.";
1701  if ( symNorm < tol )
1702  {
1703  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg );
1704  }
1705  else
1706  {
1707  errMsg += " Check the operator A (or K), it may not be Hermitian!";
1708  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg );
1709  }
1710  }
1711  }
1712  // compute C = C inv(R)
1714  nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride());
1715  // put C(:,0:oneBlock-1) into CX
1716  CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) );
1717  // put C(:,oneBlock:twoBlocks-1) into CP
1718  CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) );
1719 
1720  // check the results
1721  if (om_->isVerbosity( Debug ) ) {
1722  Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock),
1723  tmp2(oneBlock,oneBlock);
1724  MagnitudeType tmp;
1725  int teuchosret;
1726  std::stringstream os;
1727  os.precision(2);
1728  os.setf(std::ios::scientific, std::ios::floatfield);
1729 
1730  os << " Checking Full Ortho: iteration " << iter_ << std::endl;
1731 
1732  // check CX^T MM CX == I
1733  // compute tmp1 = MM*CX
1734  teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO);
1735  TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1736  "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1737  // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX
1738  teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
1739  TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1740  "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1741  // subtrace tmp2 - I == CX^H * MM * CX - I
1742  for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
1743  tmp = tmp2.normFrobenius();
1744  os << " >> Error in CX^H MM CX == I : " << tmp << std::endl;
1745 
1746  // check CP^T MM CP == I
1747  // compute tmp1 = MM*CP
1748  teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
1749  TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1750  "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1751  // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP
1752  teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO);
1753  TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1754  "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1755  // subtrace tmp2 - I == CP^H * MM * CP - I
1756  for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
1757  tmp = tmp2.normFrobenius();
1758  os << " >> Error in CP^H MM CP == I : " << tmp << std::endl;
1759 
1760  // check CX^T MM CP == 0
1761  // compute tmp1 = MM*CP
1762  teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
1763  TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1764  // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP
1765  teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
1766  TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1767  // subtrace tmp2 == CX^H * MM * CP
1768  tmp = tmp2.normFrobenius();
1769  os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl;
1770 
1771  os << std::endl;
1772  om_->print(Debug,os.str());
1773  }
1774  }
1775  else {
1776  // [S11 ... ...]
1777  // S = [S21 ... ...]
1778  // [S31 ... ...]
1779  //
1780  // CX = [S11]
1781  // [S21]
1782  // [S31] -> X = [X H P] CX
1783  //
1784  // CP = [S21] -> P = [H P] CP
1785  // [S31]
1786  //
1787  CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_ ,oneBlock,0 ,0) );
1788  CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) );
1789  }
1790 
1791  //
1792  //----------------------------------------
1793  // Compute new X and new P
1794  //----------------------------------------
1795  // Note: Use R as a temporary work space and (if full ortho) tmpMV as well
1796  {
1797 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1798  Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
1799 #endif
1800 
1801  // if full ortho, then CX and CP are dense
1802  // we multiply [X H P]*CX into tmpMV
1803  // [X H P]*CP into R
1804  // then put V(:,firstblock) <- tmpMV
1805  // V(:,thirdblock) <- R
1806  //
1807  // if no full ortho, then [H P]*CP doesn't reference first block (X)
1808  // of V, so that we can modify it before computing P
1809  // so we multiply [X H P]*CX into R
1810  // V(:,firstblock) <- R
1811  // multiply [H P]*CP into R
1812  // V(:,thirdblock) <- R
1813  //
1814  // mutatis mutandis for K[XP] and M[XP]
1815  //
1816  // use SetBlock to do the assignments into V_
1817  //
1818  // in either case, views are only allowed to be overlapping
1819  // if they are const, and it should be assume that SetBlock
1820  // creates a view of the associated part
1821  //
1822  // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P
1823  //
1824  if (fullOrtho_) {
1825  // X,P
1826  MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_);
1827  MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_);
1828  cXHP = Teuchos::null;
1829  MVT::SetBlock(*tmpmvec_,indblock1,*V_);
1830  MVT::SetBlock(*R_ ,indblock3,*V_);
1831  // KX,KP
1832  MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_);
1833  MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_);
1834  cK_XHP = Teuchos::null;
1835  MVT::SetBlock(*tmpmvec_,indblock1,*KV_);
1836  MVT::SetBlock(*R_ ,indblock3,*KV_);
1837  // MX,MP
1838  if (hasM_) {
1839  MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_);
1840  MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_);
1841  cM_XHP = Teuchos::null;
1842  MVT::SetBlock(*tmpmvec_,indblock1,*MV_);
1843  MVT::SetBlock(*R_ ,indblock3,*MV_);
1844  }
1845  else {
1846  cM_XHP = Teuchos::null;
1847  }
1848  }
1849  else {
1850  // X,P
1851  MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_);
1852  cXHP = Teuchos::null;
1853  MVT::SetBlock(*R_,indblock1,*V_);
1854  MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_);
1855  cHP = Teuchos::null;
1856  MVT::SetBlock(*R_,indblock3,*V_);
1857  // KX,KP
1858  MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_);
1859  cK_XHP = Teuchos::null;
1860  MVT::SetBlock(*R_,indblock1,*KV_);
1861  MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_);
1862  cK_HP = Teuchos::null;
1863  MVT::SetBlock(*R_,indblock3,*KV_);
1864  // MX,MP
1865  if (hasM_) {
1866  MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_);
1867  cM_XHP = Teuchos::null;
1868  MVT::SetBlock(*R_,indblock1,*MV_);
1869  MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_);
1870  cM_HP = Teuchos::null;
1871  MVT::SetBlock(*R_,indblock3,*MV_);
1872  }
1873  else {
1874  cM_XHP = Teuchos::null;
1875  cM_HP = Teuchos::null;
1876  }
1877  }
1878  } // end timing block
1879  // done with coefficient matrices
1880  CX = Teuchos::null;
1881  CP = Teuchos::null;
1882 
1883  //
1884  // we now have a P direction
1885  hasP_ = true;
1886 
1887  // debugging check: all of our const views should have been cleared by now
1888  // if not, we have a logic error above
1889  TEUCHOS_TEST_FOR_EXCEPTION( cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null
1890  || cHP != Teuchos::null || cK_HP != Teuchos::null || cM_HP != Teuchos::null
1891  || cP != Teuchos::null || cK_P != Teuchos::null || cM_P != Teuchos::null,
1892  std::logic_error,
1893  "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" );
1894 
1895  //
1896  // recreate our const MV views of X,H,P and friends
1897  setupViews();
1898 
1899  //
1900  // Compute the new residuals, explicitly
1901  {
1902 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1903  Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
1904 #endif
1905  MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
1906  Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
1907  for (int i = 0; i < blockSize_; i++) {
1908  T(i,i) = theta_[i];
1909  }
1910  MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
1911  }
1912 
1913  // R has been updated; mark the norms as out-of-date
1914  Rnorms_current_ = false;
1915  R2norms_current_ = false;
1916 
1917  // When required, monitor some orthogonalities
1918  if (om_->isVerbosity( Debug ) ) {
1919  // Check almost everything here
1920  CheckList chk;
1921  chk.checkX = true;
1922  chk.checkKX = true;
1923  chk.checkMX = true;
1924  chk.checkP = true;
1925  chk.checkKP = true;
1926  chk.checkMP = true;
1927  chk.checkR = true;
1928  om_->print( Debug, accuracyCheck(chk, ": after local update") );
1929  }
1930  else if (om_->isVerbosity( OrthoDetails )) {
1931  CheckList chk;
1932  chk.checkX = true;
1933  chk.checkP = true;
1934  chk.checkR = true;
1935  om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
1936  }
1937  } // end while (statusTest == false)
1938  }
1939 
1940 
1942  // compute/return residual M-norms
1943  template <class ScalarType, class MV, class OP>
1944  std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType>
1946  if (Rnorms_current_ == false) {
1947  // Update the residual norms
1948  orthman_->norm(*R_,Rnorms_);
1949  Rnorms_current_ = true;
1950  }
1951  return Rnorms_;
1952  }
1953 
1954 
1956  // compute/return residual 2-norms
1957  template <class ScalarType, class MV, class OP>
1958  std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType>
1960  if (R2norms_current_ == false) {
1961  // Update the residual 2-norms
1962  MVT::MvNorm(*R_,R2norms_);
1963  R2norms_current_ = true;
1964  }
1965  return R2norms_;
1966  }
1967 
1968 
1969 
1970 
1972  // Check accuracy, orthogonality, and other debugging stuff
1973  //
1974  // bools specify which tests we want to run (instead of running more than we actually care about)
1975  //
1976  // we don't bother checking the following because they are computed explicitly:
1977  // H == Prec*R
1978  // KH == K*H
1979  //
1980  //
1981  // checkX : X orthonormal
1982  // orthogonal to auxvecs
1983  // checkMX: check MX == M*X
1984  // checkKX: check KX == K*X
1985  // checkP : if fullortho P orthonormal and orthogonal to X
1986  // orthogonal to auxvecs
1987  // checkMP: check MP == M*P
1988  // checkKP: check KP == K*P
1989  // checkH : if fullortho H orthonormal and orthogonal to X and P
1990  // orthogonal to auxvecs
1991  // checkMH: check MH == M*H
1992  // checkR : check R orthogonal to X
1993  // checkQ : check that auxiliary vectors are actually orthonormal
1994  //
1995  // TODO:
1996  // add checkTheta
1997  //
1998  template <class ScalarType, class MV, class OP>
1999  std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const
2000  {
2001  using std::endl;
2002 
2003  std::stringstream os;
2004  os.precision(2);
2005  os.setf(std::ios::scientific, std::ios::floatfield);
2006  MagnitudeType tmp;
2007 
2008  os << " Debugging checks: iteration " << iter_ << where << endl;
2009 
2010  // X and friends
2011  if (chk.checkX && initialized_) {
2012  tmp = orthman_->orthonormError(*X_);
2013  os << " >> Error in X^H M X == I : " << tmp << endl;
2014  for (Array_size_type i=0; i<auxVecs_.size(); i++) {
2015  tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
2016  os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
2017  }
2018  }
2019  if (chk.checkMX && hasM_ && initialized_) {
2020  tmp = Utils::errorEquality(*X_, *MX_, MOp_);
2021  os << " >> Error in MX == M*X : " << tmp << endl;
2022  }
2023  if (chk.checkKX && initialized_) {
2024  tmp = Utils::errorEquality(*X_, *KX_, Op_);
2025  os << " >> Error in KX == K*X : " << tmp << endl;
2026  }
2027 
2028  // P and friends
2029  if (chk.checkP && hasP_ && initialized_) {
2030  if (fullOrtho_) {
2031  tmp = orthman_->orthonormError(*P_);
2032  os << " >> Error in P^H M P == I : " << tmp << endl;
2033  tmp = orthman_->orthogError(*P_,*X_);
2034  os << " >> Error in P^H M X == 0 : " << tmp << endl;
2035  }
2036  for (Array_size_type i=0; i<auxVecs_.size(); i++) {
2037  tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
2038  os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
2039  }
2040  }
2041  if (chk.checkMP && hasM_ && hasP_ && initialized_) {
2042  tmp = Utils::errorEquality(*P_, *MP_, MOp_);
2043  os << " >> Error in MP == M*P : " << tmp << endl;
2044  }
2045  if (chk.checkKP && hasP_ && initialized_) {
2046  tmp = Utils::errorEquality(*P_, *KP_, Op_);
2047  os << " >> Error in KP == K*P : " << tmp << endl;
2048  }
2049 
2050  // H and friends
2051  if (chk.checkH && initialized_) {
2052  if (fullOrtho_) {
2053  tmp = orthman_->orthonormError(*H_);
2054  os << " >> Error in H^H M H == I : " << tmp << endl;
2055  tmp = orthman_->orthogError(*H_,*X_);
2056  os << " >> Error in H^H M X == 0 : " << tmp << endl;
2057  if (hasP_) {
2058  tmp = orthman_->orthogError(*H_,*P_);
2059  os << " >> Error in H^H M P == 0 : " << tmp << endl;
2060  }
2061  }
2062  for (Array_size_type i=0; i<auxVecs_.size(); i++) {
2063  tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
2064  os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
2065  }
2066  }
2067  if (chk.checkMH && hasM_ && initialized_) {
2068  tmp = Utils::errorEquality(*H_, *MH_, MOp_);
2069  os << " >> Error in MH == M*H : " << tmp << endl;
2070  }
2071 
2072  // R: this is not M-orthogonality, but standard euclidean orthogonality
2073  if (chk.checkR && initialized_) {
2074  Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
2075  MVT::MvTransMv(ONE,*X_,*R_,xTx);
2076  tmp = xTx.normFrobenius();
2077  MVT::MvTransMv(ONE,*R_,*R_,xTx);
2078  double normR = xTx.normFrobenius();
2079  os << " >> RelError in X^H R == 0: " << tmp/normR << endl;
2080  }
2081 
2082  // Q
2083  if (chk.checkQ) {
2084  for (Array_size_type i=0; i<auxVecs_.size(); i++) {
2085  tmp = orthman_->orthonormError(*auxVecs_[i]);
2086  os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
2087  for (Array_size_type j=i+1; j<auxVecs_.size(); j++) {
2088  tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
2089  os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
2090  }
2091  }
2092  }
2093 
2094  os << endl;
2095 
2096  return os.str();
2097  }
2098 
2099 
2101  // Print the current status of the solver
2102  template <class ScalarType, class MV, class OP>
2103  void
2105  {
2106  using std::endl;
2107 
2108  os.setf(std::ios::scientific, std::ios::floatfield);
2109  os.precision(6);
2110  os <<endl;
2111  os <<"================================================================================" << endl;
2112  os << endl;
2113  os <<" LOBPCG Solver Status" << endl;
2114  os << endl;
2115  os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
2116  os <<"The number of iterations performed is " << iter_ << endl;
2117  os <<"The current block size is " << blockSize_ << endl;
2118  os <<"The number of auxiliary vectors is " << numAuxVecs_ << endl;
2119  os <<"The number of operations Op*x is " << count_ApplyOp_ << endl;
2120  os <<"The number of operations M*x is " << count_ApplyM_ << endl;
2121  os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
2122 
2123  os.setf(std::ios_base::right, std::ios_base::adjustfield);
2124 
2125  if (initialized_) {
2126  os << endl;
2127  os <<"CURRENT EIGENVALUE ESTIMATES "<<endl;
2128  os << std::setw(20) << "Eigenvalue"
2129  << std::setw(20) << "Residual(M)"
2130  << std::setw(20) << "Residual(2)"
2131  << endl;
2132  os <<"--------------------------------------------------------------------------------"<<endl;
2133  for (int i=0; i<blockSize_; i++) {
2134  os << std::setw(20) << theta_[i];
2135  if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
2136  else os << std::setw(20) << "not current";
2137  if (R2norms_current_) os << std::setw(20) << R2norms_[i];
2138  else os << std::setw(20) << "not current";
2139  os << endl;
2140  }
2141  }
2142  os <<"================================================================================" << endl;
2143  os << endl;
2144  }
2145 
2147  // are we initialized or not?
2148  template <class ScalarType, class MV, class OP>
2150  return initialized_;
2151  }
2152 
2153 
2155  // is P valid or not?
2156  template <class ScalarType, class MV, class OP>
2158  return hasP_;
2159  }
2160 
2162  // is full orthogonalization enabled or not?
2163  template <class ScalarType, class MV, class OP>
2165  return(fullOrtho_);
2166  }
2167 
2168 
2170  // return the current auxilliary vectors
2171  template <class ScalarType, class MV, class OP>
2173  return auxVecs_;
2174  }
2175 
2177  // return the current block size
2178  template <class ScalarType, class MV, class OP>
2180  return(blockSize_);
2181  }
2182 
2184  // return the current eigenproblem
2185  template <class ScalarType, class MV, class OP>
2187  return(*problem_);
2188  }
2189 
2190 
2192  // return the max subspace dimension
2193  template <class ScalarType, class MV, class OP>
2195  return 3*blockSize_;
2196  }
2197 
2199  // return the current subspace dimension
2200  template <class ScalarType, class MV, class OP>
2202  if (!initialized_) return 0;
2203  return nevLocal_;
2204  }
2205 
2206 
2208  // return the current ritz residual norms
2209  template <class ScalarType, class MV, class OP>
2210  std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType>
2212  {
2213  return this->getRes2Norms();
2214  }
2215 
2216 
2218  // return the current compression indices
2219  template <class ScalarType, class MV, class OP>
2221  std::vector<int> ret(nevLocal_,0);
2222  return ret;
2223  }
2224 
2225 
2227  // return the current ritz values
2228  template <class ScalarType, class MV, class OP>
2229  std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() {
2230  std::vector<Value<ScalarType> > ret(nevLocal_);
2231  for (int i=0; i<nevLocal_; i++) {
2232  ret[i].realpart = theta_[i];
2233  ret[i].imagpart = ZERO;
2234  }
2235  return ret;
2236  }
2237 
2239  // Set a new StatusTest for the solver.
2240  template <class ScalarType, class MV, class OP>
2242  TEUCHOS_TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument,
2243  "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest.");
2244  tester_ = test;
2245  }
2246 
2248  // Get the current StatusTest used by the solver.
2249  template <class ScalarType, class MV, class OP>
2251  return tester_;
2252  }
2253 
2255  // return the current ritz vectors
2256  template <class ScalarType, class MV, class OP>
2258  return X_;
2259  }
2260 
2261 
2263  // reset the iteration counter
2264  template <class ScalarType, class MV, class OP>
2266  iter_=0;
2267  }
2268 
2269 
2271  // return the number of iterations
2272  template <class ScalarType, class MV, class OP>
2274  return(iter_);
2275  }
2276 
2277 
2279  // return the state
2280  template <class ScalarType, class MV, class OP>
2283  state.V = V_;
2284  state.KV = KV_;
2285  state.X = X_;
2286  state.KX = KX_;
2287  state.P = P_;
2288  state.KP = KP_;
2289  state.H = H_;
2290  state.KH = KH_;
2291  state.R = R_;
2292  state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
2293  if (hasM_) {
2294  state.MV = MV_;
2295  state.MX = MX_;
2296  state.MP = MP_;
2297  state.MH = MH_;
2298  }
2299  else {
2300  state.MX = Teuchos::null;
2301  state.MP = Teuchos::null;
2302  state.MH = Teuchos::null;
2303  }
2304  return state;
2305  }
2306 
2307 } // end Anasazi namespace
2308 
2309 #endif // ANASAZI_LOBPCG_HPP
ScalarTraits< ScalarType >::magnitudeType normOne() const
ScalarType * values() const
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Templated virtual class for providing orthogonalization/orthonormalization methods with matrix-based ...
Teuchos::Array< Teuchos::RCP< const MV > > getAuxVecs() const
Get the current auxiliary vectors.
LOBPCG(const Teuchos::RCP< Eigenproblem< ScalarType, MV, OP > > &problem, const Teuchos::RCP< SortManager< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > > &sorter, const Teuchos::RCP< OutputManager< ScalarType > > &printer, const Teuchos::RCP< StatusTest< ScalarType, MV, OP > > &tester, const Teuchos::RCP< MatOrthoManager< ScalarType, MV, OP > > &ortho, Teuchos::ParameterList &params)
LOBPCG constructor with eigenproblem, solver utilities, and parameter list of solver options...
This class defines the interface required by an eigensolver and status test class to compute solution...
int getBlockSize() const
Get the blocksize to be used by the iterative solver in solving this eigenproblem.
void setStatusTest(Teuchos::RCP< StatusTest< ScalarType, MV, OP > > test)
Set a new StatusTest for the solver.
Declaration of basic traits for the multivector type.
T & get(const std::string &name, T def_value)
int multiply(ETransp transa, ETransp transb, ScalarType alpha, const SerialDenseMatrix< OrdinalType, ScalarType > &A, const SerialDenseMatrix< OrdinalType, ScalarType > &B, ScalarType beta)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
This class provides the Locally Optimal Block Preconditioned Conjugate Gradient (LOBPCG) iteration...
Teuchos::RCP< const MultiVector > V
The current test basis.
virtual ~LOBPCG()
LOBPCG destructor
Virtual base class which defines basic traits for the operator type.
Teuchos::RCP< const MultiVector > H
The current preconditioned residual vectors.
Teuchos::RCP< const MultiVector > P
The current search direction.
An exception class parent to all Anasazi exceptions.
Teuchos::RCP< const MultiVector > MV
The image of the current test basis under M, or Teuchos::null if M was not specified.
ScalarTraits< ScalarType >::magnitudeType normFrobenius() const
Pure virtual base class which describes the basic interface to the iterative eigensolver.
Teuchos::RCP< const MultiVector > R
The current residual vectors.
void initialize()
Initialize the solver with the initial vectors from the eigenproblem or random data.
Teuchos::RCP< const std::vector< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > > T
The current Ritz values.
bool isInitialized() const
Indicates whether the solver has been initialized or not.
Anasazi&#39;s templated, static class providing utilities for the solvers.
const Eigenproblem< ScalarType, MV, OP > & getProblem() const
Get a constant reference to the eigenvalue problem.
Anasazi&#39;s templated virtual class for providing routines for orthogonalization and orthonormalization...
void setBlockSize(int blockSize)
Set the blocksize to be used by the iterative solver in solving this eigenproblem.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Output managers remove the need for the eigensolver to know any information about the required output...
void POTRF(const char &UPLO, const OrdinalType &n, ScalarType *A, const OrdinalType &lda, OrdinalType *info) const
Traits class which defines basic operations on multivectors.
int getNumIters() const
Get the current iteration count.
Virtual base class which defines basic traits for the operator type.
std::vector< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > getRes2Norms()
Get the current residual 2-norms.
Teuchos::RCP< const MultiVector > MX
The image of the current eigenvectors under M, or Teuchos::null if M was not specified.
Teuchos::RCP< const MultiVector > KX
The image of the current eigenvectors under K.
bool getFullOrtho() const
Determine if the LOBPCG iteration is using full orthogonality.
Teuchos::RCP< const MultiVector > KH
The image of the current preconditioned residual vectors under K.
std::vector< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > getResNorms()
Get the current residual norms.
std::vector< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > getRitzRes2Norms()
Get the 2-norms of the residuals.
std::vector< int > getRitzIndex()
Get the index used for extracting Ritz vectors from getRitzVectors().
int getMaxSubspaceDim() const
Get the maximum dimension allocated for the search subspace. For LOBPCG, this always returns 3*getBlo...
Teuchos::RCP< const MultiVector > MH
The image of the current preconditioned residual vectors under M, or Teuchos::null if M was not speci...
Teuchos::RCP< const MultiVector > KP
The image of the current search direction under K.
Teuchos::RCP< StatusTest< ScalarType, MV, OP > > getStatusTest() const
Get the current StatusTest used by the solver.
void push_back(const value_type &x)
void currentStatus(std::ostream &os)
This method requests that the solver print out its current status to screen.
void iterate()
This method performs LOBPCG iterations until the status test indicates the need to stop or an error o...
LOBPCGRitzFailure is thrown when the LOBPCG solver is unable to continue a call to LOBPCG::iterate() ...
std::vector< Value< ScalarType > > getRitzValues()
Get the Ritz values from the previous iteration.
Teuchos::RCP< const MultiVector > KV
The image of the current test basis under K.
void setAuxVecs(const Teuchos::Array< Teuchos::RCP< const MV > > &auxvecs)
Set the auxiliary vectors for the solver.
Types and exceptions used within Anasazi solvers and interfaces.
void resetNumIters()
Reset the iteration count.
Teuchos::RCP< const MultiVector > MP
The image of the current search direction under M, or Teuchos::null if M was not specified.
Anasazi&#39;s templated pure virtual class for managing the sorting of approximate eigenvalues computed b...
Common interface of stopping criteria for Anasazi&#39;s solvers.
LOBPCGOrthoFailure is thrown when an orthogonalization attempt fails.
LOBPCGInitFailure is thrown when the LOBPCG solver is unable to generate an initial iterate in the LO...
LOBPCGState< ScalarType, MV > getState() const
Get the current state of the eigensolver.
The Eigensolver is a templated virtual base class that defines the basic interface that any eigensolv...
void setFullOrtho(bool fullOrtho)
Instruct the LOBPCG iteration to use full orthogonality.
Structure to contain pointers to Anasazi state variables.
Teuchos::RCP< const MultiVector > X
The current eigenvectors.
OrdinalType stride() const
Teuchos::RCP< const MV > getRitzVectors()
Get the Ritz vectors from the previous iteration.
bool hasP()
Indicates whether the search direction given by getState() is valid.
int getCurSubspaceDim() const
Get the dimension of the search subspace used to generate the current eigenvectors and eigenvalues...
Class which provides internal utilities for the Anasazi solvers.