Xpetra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Xpetra_MatrixMatrix_decl.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Xpetra: A linear algebra interface package
4 //
5 // Copyright 2012 NTESS and the Xpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef PACKAGES_XPETRA_SUP_UTILS_XPETRA_MATRIXMATRIX_DECL_HPP_
11 #define PACKAGES_XPETRA_SUP_UTILS_XPETRA_MATRIXMATRIX_DECL_HPP_
12 
13 #include "Xpetra_ConfigDefs.hpp"
14 
15 #include "Xpetra_BlockedCrsMatrix.hpp"
16 #include "Xpetra_CrsMatrixWrap.hpp"
17 #include "Xpetra_MapExtractor.hpp"
18 #include "Xpetra_Map.hpp"
19 #include "Xpetra_MatrixFactory.hpp"
20 #include "Xpetra_Matrix.hpp"
21 #include "Xpetra_StridedMapFactory.hpp"
22 #include "Xpetra_StridedMap.hpp"
23 
24 #include "Xpetra_Helpers.hpp"
25 
26 #ifdef HAVE_XPETRA_EPETRA
28 #endif
29 
30 #ifdef HAVE_XPETRA_EPETRAEXT
31 #include <EpetraExt_MatrixMatrix.h>
32 #include <EpetraExt_RowMatrixOut.h>
33 #include <Epetra_RowMatrixTransposer.h>
34 #endif // HAVE_XPETRA_EPETRAEXT
35 
36 #ifdef HAVE_XPETRA_TPETRA
37 #include <TpetraExt_MatrixMatrix.hpp>
38 #include <Tpetra_RowMatrixTransposer.hpp>
39 #include <MatrixMarket_Tpetra.hpp>
40 #include <Xpetra_TpetraCrsMatrix.hpp>
41 #include <Xpetra_TpetraBlockCrsMatrix.hpp>
42 #include <Tpetra_BlockCrsMatrix_Helpers.hpp>
43 #include <Xpetra_TpetraMultiVector.hpp>
44 #include <Xpetra_TpetraVector.hpp>
45 #endif // HAVE_XPETRA_TPETRA
46 
47 namespace Xpetra {
48 
49 template <class Scalar,
50  class LocalOrdinal /*= int*/,
51  class GlobalOrdinal /*= LocalOrdinal*/,
52  class Node /*= Tpetra::KokkosClassic::DefaultNode::DefaultNodeType*/>
53 class MatrixMatrix {
54 #undef XPETRA_MATRIXMATRIX_SHORT
55 #include "Xpetra_UseShortNames.hpp"
56 
57  public:
82  static void Multiply(const Matrix& A, bool transposeA,
83  const Matrix& B, bool transposeB,
84  Matrix& C,
85  bool call_FillComplete_on_result = true,
86  bool doOptimizeStorage = true,
87  const std::string& label = std::string(),
88  const RCP<ParameterList>& params = null);
89 
112  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA, const Matrix& B, bool transposeB, RCP<Matrix> C_in,
113  Teuchos::FancyOStream& fos,
114  bool doFillComplete = true,
115  bool doOptimizeStorage = true,
116  const std::string& label = std::string(),
117  const RCP<ParameterList>& params = null);
118 
129  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA, const Matrix& B, bool transposeB, Teuchos::FancyOStream& fos,
130  bool callFillCompleteOnResult = true, bool doOptimizeStorage = true, const std::string& label = std::string(),
131  const RCP<ParameterList>& params = null);
132 
133 #ifdef HAVE_XPETRA_EPETRAEXT
134  // Michael Gee's MLMultiply
135  static RCP<Epetra_CrsMatrix> MLTwoMatrixMultiply(const Epetra_CrsMatrix& epA,
136  const Epetra_CrsMatrix& epB,
137  Teuchos::FancyOStream& fos);
138 #endif // ifdef HAVE_XPETRA_EPETRAEXT
139 
150  static RCP<BlockedCrsMatrix> TwoMatrixMultiplyBlock(const BlockedCrsMatrix& A, bool transposeA,
151  const BlockedCrsMatrix& B, bool transposeB,
152  Teuchos::FancyOStream& fos,
153  bool doFillComplete = true,
154  bool doOptimizeStorage = true);
155 
168  static void TwoMatrixAdd(const Matrix& A, bool transposeA, SC alpha, Matrix& B, SC beta);
169 
184  static void TwoMatrixAdd(const Matrix& A, bool transposeA, const SC& alpha,
185  const Matrix& B, bool transposeB, const SC& beta,
186  RCP<Matrix>& C, Teuchos::FancyOStream& fos, bool AHasFixedNnzPerRow = false);
187 
188 }; // class MatrixMatrix
189 
190 #ifdef HAVE_XPETRA_EPETRA
191 // specialization MatrixMatrix for SC=double, LO=GO=int
192 template <>
193 class MatrixMatrix<double, int, int, EpetraNode> {
194  typedef double Scalar;
195  typedef int LocalOrdinal;
196  typedef int GlobalOrdinal;
197  typedef EpetraNode Node;
198 #include "Xpetra_UseShortNames.hpp"
199 
200  public:
225  static void Multiply(const Matrix& A, bool transposeA,
226  const Matrix& B, bool transposeB,
227  Matrix& C,
228  bool call_FillComplete_on_result = true,
229  bool doOptimizeStorage = true,
230  const std::string& label = std::string(),
231  const RCP<ParameterList>& params = null) {
232  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == false && C.getRowMap()->isSameAs(*A.getRowMap()) == false,
233  Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as row map of A");
234  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == true && C.getRowMap()->isSameAs(*A.getDomainMap()) == false,
235  Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as domain map of A");
236 
237  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Xpetra::Exceptions::RuntimeError, "A is not fill-completed");
238  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Xpetra::Exceptions::RuntimeError, "B is not fill-completed");
239 
240  bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;
241 
242  using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
243 
244  if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
245 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
246  helpers::epetraExtMult(A, transposeA, B, transposeB, C, haveMultiplyDoFillComplete);
247 #else
248  throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Multiply requires EpetraExt to be compiled."));
249 #endif
250  } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
251 #ifdef HAVE_XPETRA_TPETRA
252 #if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
253  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
254  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra <double,int,int> ETI enabled."));
255 #else
256  if (helpers::isTpetraCrs(A) && helpers::isTpetraCrs(B) && helpers::isTpetraCrs(C)) {
257  // All matrices are Crs
258  const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpA = helpers::Op2TpetraCrs(A);
259  const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpB = helpers::Op2TpetraCrs(B);
260  Tpetra::CrsMatrix<SC, LO, GO, NO>& tpC = helpers::Op2NonConstTpetraCrs(C);
261 
262  // 18Feb2013 JJH I'm reenabling the code that allows the matrix matrix multiply to do the fillComplete.
263  // Previously, Tpetra's matrix matrix multiply did not support fillComplete.
264  Tpetra::MatrixMatrix::Multiply(tpA, transposeA, tpB, transposeB, tpC, haveMultiplyDoFillComplete, label, params);
265  } else if (helpers::isTpetraBlockCrs(A) && helpers::isTpetraBlockCrs(B)) {
266  // All matrices are BlockCrs (except maybe Ac)
267  // FIXME: For the moment we're just going to clobber the innards of Ac, so no reuse. Once we have a reuse kernel,
268  // we'll need to think about refactoring BlockCrs so we can do something smarter here.
269 
270  if (!A.getRowMap()->getComm()->getRank())
271  std::cout << "WARNING: Using inefficient BlockCrs Multiply Placeholder" << std::endl;
272 
273  const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraBlockCrs(A);
274  const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraBlockCrs(B);
275  using CRS = Tpetra::CrsMatrix<SC, LO, GO, NO>;
276  RCP<const CRS> Acrs = Tpetra::convertToCrsMatrix(tpA);
277  RCP<const CRS> Bcrs = Tpetra::convertToCrsMatrix(tpB);
278 
279  // We need the global constants to do the copy back to BlockCrs
280  RCP<ParameterList> new_params;
281  if (!params.is_null()) {
282  new_params = rcp(new Teuchos::ParameterList(*params));
283  new_params->set("compute global constants", true);
284  }
285 
286  // FIXME: The lines below only works because we're assuming Ac is Point
287  RCP<CRS> tempAc = Teuchos::rcp(new CRS(Acrs->getRowMap(), 0));
288  Tpetra::MatrixMatrix::Multiply(*Acrs, transposeA, *Bcrs, transposeB, *tempAc, haveMultiplyDoFillComplete, label, new_params);
289 
290  // Temporary output matrix
291  RCP<Tpetra::BlockCrsMatrix<SC, LO, GO, NO> > Ac_t = Tpetra::convertToBlockCrsMatrix(*tempAc, A.GetStorageBlockSize());
292  RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> > Ac_x = Teuchos::rcp(new Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO>(Ac_t));
293  RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > Ac_p = Ac_x;
294 
295  // We can now cheat and replace the innards of Ac
296  RCP<Xpetra::CrsMatrixWrap<SC, LO, GO, NO> > Ac_w = Teuchos::rcp_dynamic_cast<Xpetra::CrsMatrixWrap<SC, LO, GO, NO> >(Teuchos::rcpFromRef(C));
297  Ac_w->replaceCrsMatrix(Ac_p);
298  } else {
299  // Mix and match
300  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "Mix-and-match Crs/BlockCrs Multiply not currently supported");
301  }
302 #endif
303 #else
304  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
305 #endif
306  }
307 
308  if (call_FillComplete_on_result && !haveMultiplyDoFillComplete) {
309  RCP<Teuchos::ParameterList> fillParams = rcp(new Teuchos::ParameterList());
310  fillParams->set("Optimize Storage", doOptimizeStorage);
311  C.fillComplete((transposeB) ? B.getRangeMap() : B.getDomainMap(),
312  (transposeA) ? A.getDomainMap() : A.getRangeMap(),
313  fillParams);
314  }
315 
316  // transfer striding information
317  RCP<Matrix> rcpA = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(A));
318  RCP<Matrix> rcpB = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(B));
319  C.CreateView("stridedMaps", rcpA, transposeA, rcpB, transposeB); // TODO use references instead of RCPs
320  } // end Multiply
321 
344  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
345  const Matrix& B, bool transposeB,
346  RCP<Matrix> C_in,
347  Teuchos::FancyOStream& fos,
348  bool doFillComplete = true,
349  bool doOptimizeStorage = true,
350  const std::string& label = std::string(),
351  const RCP<ParameterList>& params = null) {
352  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
353  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
354 
355  // Optimization using ML Multiply when available and requested
356  // This feature is currently not supported. We would have to introduce the HAVE_XPETRA_ML_MMM flag
357 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT) && defined(HAVE_XPETRA_ML_MMM)
358  if (B.getDomainMap()->lib() == Xpetra::UseEpetra && !transposeA && !transposeB) {
359  RCP<const Epetra_CrsMatrix> epA = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(rcpFromRef(A));
360  RCP<const Epetra_CrsMatrix> epB = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(rcpFromRef(B));
361  RCP<Epetra_CrsMatrix> epC = MLTwoMatrixMultiply(*epA, *epB, fos);
362 
363  RCP<Matrix> C = Convert_Epetra_CrsMatrix_ToXpetra_CrsMatrixWrap<SC, LO, GO, NO>(epC);
364  if (doFillComplete) {
365  RCP<Teuchos::ParameterList> fillParams = rcp(new Teuchos::ParameterList());
366  fillParams->set("Optimize Storage", doOptimizeStorage);
367  C->fillComplete(B.getDomainMap(), A.getRangeMap(), fillParams);
368  }
369 
370  // Fill strided maps information
371  // This is necessary since the ML matrix matrix multiplication routine has no handling for this
372  // TODO: move this call to MLMultiply...
373  C->CreateView("stridedMaps", rcpFromRef(A), transposeA, rcpFromRef(B), transposeB);
374 
375  return C;
376  }
377 #endif // EPETRA + EPETRAEXT + ML
378 
379  // Default case: Xpetra Multiply
380  RCP<Matrix> C = C_in;
381 
382  if (C == Teuchos::null) {
383  double nnzPerRow = Teuchos::as<double>(0);
384 
385 #if 0
386  if (A.getDomainMap()->lib() == Xpetra::UseTpetra) {
387  // For now, follow what ML and Epetra do.
388  GO numRowsA = A.getGlobalNumRows();
389  GO numRowsB = B.getGlobalNumRows();
390  nnzPerRow = sqrt(Teuchos::as<double>(A.getGlobalNumEntries())/numRowsA) +
391  sqrt(Teuchos::as<double>(B.getGlobalNumEntries())/numRowsB) - 1;
392  nnzPerRow *= nnzPerRow;
393  double totalNnz = nnzPerRow * A.getGlobalNumRows() * 0.75 + 100;
394  double minNnz = Teuchos::as<double>(1.2 * A.getGlobalNumEntries());
395  if (totalNnz < minNnz)
396  totalNnz = minNnz;
397  nnzPerRow = totalNnz / A.getGlobalNumRows();
398 
399  fos << "Matrix product nnz per row estimate = " << Teuchos::as<LO>(nnzPerRow) << std::endl;
400  }
401 #endif
402 
403  if (transposeA)
404  C = MatrixFactory::Build(A.getDomainMap(), Teuchos::as<LO>(nnzPerRow));
405  else
406  C = MatrixFactory::Build(A.getRowMap(), Teuchos::as<LO>(nnzPerRow));
407 
408  } else {
409  C->resumeFill(); // why this is not done inside of Tpetra MxM?
410  fos << "Reuse C pattern" << std::endl;
411  }
412 
413  Multiply(A, transposeA, B, transposeB, *C, doFillComplete, doOptimizeStorage, label, params); // call Multiply routine from above
414 
415  return C;
416  }
417 
428  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
429  const Matrix& B, bool transposeB,
430  Teuchos::FancyOStream& fos,
431  bool callFillCompleteOnResult = true,
432  bool doOptimizeStorage = true,
433  const std::string& label = std::string(),
434  const RCP<ParameterList>& params = null) {
435  return Multiply(A, transposeA, B, transposeB, Teuchos::null, fos, callFillCompleteOnResult, doOptimizeStorage, label, params);
436  }
437 
438 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
439  // Michael Gee's MLMultiply
440  static RCP<Epetra_CrsMatrix> MLTwoMatrixMultiply(const Epetra_CrsMatrix& epA,
441  const Epetra_CrsMatrix& epB,
442  Teuchos::FancyOStream& fos) {
443 #if defined(HAVE_XPETRA_ML_MMM) // Note: this is currently not supported
444  ML_Comm* comm;
445  ML_Comm_Create(&comm);
446  fos << "****** USING ML's MATRIX MATRIX MULTIPLY ******" << std::endl;
447 #ifdef HAVE_MPI
448  // ML_Comm uses MPI_COMM_WORLD, so try to use the same communicator as epA.
449  const Epetra_MpiComm* Mcomm = dynamic_cast<const Epetra_MpiComm*>(&(epA.Comm()));
450  if (Mcomm)
451  ML_Comm_Set_UsrComm(comm, Mcomm->GetMpiComm());
452 #endif
453  // in order to use ML, there must be no indices missing from the matrix column maps.
454  EpetraExt::CrsMatrix_SolverMap Atransform;
455  EpetraExt::CrsMatrix_SolverMap Btransform;
456  const Epetra_CrsMatrix& A = Atransform(const_cast<Epetra_CrsMatrix&>(epA));
457  const Epetra_CrsMatrix& B = Btransform(const_cast<Epetra_CrsMatrix&>(epB));
458 
459  if (!A.Filled()) throw Exceptions::RuntimeError("A has to be FillCompleted");
460  if (!B.Filled()) throw Exceptions::RuntimeError("B has to be FillCompleted");
461 
462  // create ML operators from EpetraCrsMatrix
463  ML_Operator* ml_As = ML_Operator_Create(comm);
464  ML_Operator* ml_Bs = ML_Operator_Create(comm);
465  ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&A), ml_As); // Should we test if the lightweight wrapper is actually used or if WrapEpetraCrsMatrix fall backs to the heavy one?
466  ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&B), ml_Bs);
467  ML_Operator* ml_AtimesB = ML_Operator_Create(comm);
468  {
469  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("ML_2matmult kernel"));
470  ML_2matmult(ml_As, ml_Bs, ml_AtimesB, ML_CSR_MATRIX); // do NOT use ML_EpetraCRS_MATRIX!!!
471  }
472  ML_Operator_Destroy(&ml_As);
473  ML_Operator_Destroy(&ml_Bs);
474 
475  // For ml_AtimesB we have to reconstruct the column map in global indexing,
476  // The following is going down to the salt-mines of ML ...
477  // note: we use integers, since ML only works for Epetra...
478  int N_local = ml_AtimesB->invec_leng;
479  ML_CommInfoOP* getrow_comm = ml_AtimesB->getrow->pre_comm;
480  if (!getrow_comm) throw(Exceptions::RuntimeError("ML_Operator does not have a CommInfo"));
481  ML_Comm* comm_AB = ml_AtimesB->comm; // get comm object
482  if (N_local != B.DomainMap().NumMyElements())
483  throw(Exceptions::RuntimeError("Mismatch in local row dimension between ML and Epetra"));
484  int N_rcvd = 0;
485  int N_send = 0;
486  int flag = 0;
487  for (int i = 0; i < getrow_comm->N_neighbors; i++) {
488  N_rcvd += (getrow_comm->neighbors)[i].N_rcv;
489  N_send += (getrow_comm->neighbors)[i].N_send;
490  if (((getrow_comm->neighbors)[i].N_rcv != 0) &&
491  ((getrow_comm->neighbors)[i].rcv_list != NULL)) flag = 1;
492  }
493  // For some unknown reason, ML likes to have stuff one larger than
494  // neccessary...
495  std::vector<double> dtemp(N_local + N_rcvd + 1); // "double" vector for comm function
496  std::vector<int> cmap(N_local + N_rcvd + 1); // vector for gids
497  for (int i = 0; i < N_local; ++i) {
498  cmap[i] = B.DomainMap().GID(i);
499  dtemp[i] = (double)cmap[i];
500  }
501  ML_cheap_exchange_bdry(&dtemp[0], getrow_comm, N_local, N_send, comm_AB); // do communication
502  if (flag) { // process received data
503  int count = N_local;
504  const int neighbors = getrow_comm->N_neighbors;
505  for (int i = 0; i < neighbors; i++) {
506  const int nrcv = getrow_comm->neighbors[i].N_rcv;
507  for (int j = 0; j < nrcv; j++)
508  cmap[getrow_comm->neighbors[i].rcv_list[j]] = (int)dtemp[count++];
509  }
510  } else {
511  for (int i = 0; i < N_local + N_rcvd; ++i)
512  cmap[i] = (int)dtemp[i];
513  }
514  dtemp.clear(); // free double array
515 
516  // we can now determine a matching column map for the result
517  Epetra_Map gcmap(-1, N_local + N_rcvd, &cmap[0], B.ColMap().IndexBase(), A.Comm());
518 
519  int allocated = 0;
520  int rowlength;
521  double* val = NULL;
522  int* bindx = NULL;
523 
524  const int myrowlength = A.RowMap().NumMyElements();
525  const Epetra_Map& rowmap = A.RowMap();
526 
527  // Determine the maximum bandwith for the result matrix.
528  // replaces the old, very(!) memory-consuming guess:
529  // int guessnpr = A.MaxNumEntries()*B.MaxNumEntries();
530  int educatedguess = 0;
531  for (int i = 0; i < myrowlength; ++i) {
532  // get local row
533  ML_get_matrix_row(ml_AtimesB, 1, &i, &allocated, &bindx, &val, &rowlength, 0);
534  if (rowlength > educatedguess)
535  educatedguess = rowlength;
536  }
537 
538  // allocate our result matrix and fill it
539  RCP<Epetra_CrsMatrix> result = rcp(new Epetra_CrsMatrix(::Copy, A.RangeMap(), gcmap, educatedguess, false));
540 
541  std::vector<int> gcid(educatedguess);
542  for (int i = 0; i < myrowlength; ++i) {
543  const int grid = rowmap.GID(i);
544  // get local row
545  ML_get_matrix_row(ml_AtimesB, 1, &i, &allocated, &bindx, &val, &rowlength, 0);
546  if (!rowlength) continue;
547  if ((int)gcid.size() < rowlength) gcid.resize(rowlength);
548  for (int j = 0; j < rowlength; ++j) {
549  gcid[j] = gcmap.GID(bindx[j]);
550  if (gcid[j] < 0)
551  throw Exceptions::RuntimeError("Error: cannot find gcid!");
552  }
553  int err = result->InsertGlobalValues(grid, rowlength, val, &gcid[0]);
554  if (err != 0 && err != 1) {
555  std::ostringstream errStr;
556  errStr << "Epetra_CrsMatrix::InsertGlobalValues returned err=" << err;
557  throw Exceptions::RuntimeError(errStr.str());
558  }
559  }
560  // free memory
561  if (bindx) ML_free(bindx);
562  if (val) ML_free(val);
563  ML_Operator_Destroy(&ml_AtimesB);
564  ML_Comm_Destroy(&comm);
565 
566  return result;
567 #else // no MUELU_ML
568  (void)epA;
569  (void)epB;
570  (void)fos;
571  TEUCHOS_TEST_FOR_EXCEPTION(true, Xpetra::Exceptions::RuntimeError,
572  "No ML multiplication available. This feature is currently not supported by Xpetra.");
573  TEUCHOS_UNREACHABLE_RETURN(Teuchos::null);
574 #endif
575  }
576 #endif // ifdef HAVE_XPETRA_EPETRAEXT
577 
588  static RCP<BlockedCrsMatrix> TwoMatrixMultiplyBlock(const BlockedCrsMatrix& A, bool transposeA,
589  const BlockedCrsMatrix& B, bool transposeB,
590  Teuchos::FancyOStream& fos,
591  bool doFillComplete = true,
592  bool doOptimizeStorage = true) {
593  TEUCHOS_TEST_FOR_EXCEPTION(transposeA || transposeB, Exceptions::RuntimeError,
594  "TwoMatrixMultiply for BlockedCrsMatrix not implemented for transposeA==true or transposeB==true");
595 
596  // Preconditions
597  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
598  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
599 
600  RCP<const MapExtractor> rgmapextractor = A.getRangeMapExtractor();
601  RCP<const MapExtractor> domapextractor = B.getDomainMapExtractor();
602 
603  RCP<BlockedCrsMatrix> C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
604 
605  for (size_t i = 0; i < A.Rows(); ++i) { // loop over all block rows of A
606  for (size_t j = 0; j < B.Cols(); ++j) { // loop over all block columns of B
607  RCP<Matrix> Cij;
608 
609  for (size_t l = 0; l < B.Rows(); ++l) { // loop for calculating entry C_{ij}
610  RCP<Matrix> crmat1 = A.getMatrix(i, l);
611  RCP<Matrix> crmat2 = B.getMatrix(l, j);
612 
613  if (crmat1.is_null() || crmat2.is_null())
614  continue;
615 
616  // try unwrapping 1x1 blocked matrix
617  {
618  auto unwrap1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
619  auto unwrap2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
620 
621  if (unwrap1.is_null() != unwrap2.is_null()) {
622  if (unwrap1 != Teuchos::null && unwrap1->Rows() == 1 && unwrap1->Cols() == 1)
623  crmat1 = unwrap1->getCrsMatrix();
624  if (unwrap2 != Teuchos::null && unwrap2->Rows() == 1 && unwrap2->Cols() == 1)
625  crmat2 = unwrap2->getCrsMatrix();
626  }
627  }
628 
629  RCP<CrsMatrixWrap> crop1 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat1);
630  RCP<CrsMatrixWrap> crop2 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat2);
631  TEUCHOS_TEST_FOR_EXCEPTION(crop1.is_null() != crop2.is_null(), Xpetra::Exceptions::RuntimeError,
632  "A and B must be either both (compatible) BlockedCrsMatrix objects or both CrsMatrixWrap objects.");
633 
634  // Forcibly compute the global constants if we don't have them (only works for real CrsMatrices, not nested blocks)
635  if (!crop1.is_null())
636  Teuchos::rcp_const_cast<CrsGraph>(crmat1->getCrsGraph())->computeGlobalConstants();
637  if (!crop2.is_null())
638  Teuchos::rcp_const_cast<CrsGraph>(crmat2->getCrsGraph())->computeGlobalConstants();
639 
640  TEUCHOS_TEST_FOR_EXCEPTION(!crmat1->haveGlobalConstants(), Exceptions::RuntimeError,
641  "crmat1 does not have global constants");
642  TEUCHOS_TEST_FOR_EXCEPTION(!crmat2->haveGlobalConstants(), Exceptions::RuntimeError,
643  "crmat2 does not have global constants");
644 
645  if (crmat1->getGlobalNumEntries() == 0 || crmat2->getGlobalNumEntries() == 0)
646  continue;
647 
648  // temporary matrix containing result of local block multiplication
649  RCP<Matrix> temp = Teuchos::null;
650 
651  if (crop1 != Teuchos::null && crop2 != Teuchos::null)
652  temp = Multiply(*crop1, false, *crop2, false, fos);
653  else {
654  RCP<BlockedCrsMatrix> bop1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
655  RCP<BlockedCrsMatrix> bop2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
656  TEUCHOS_TEST_FOR_EXCEPTION(bop1.is_null() == true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
657  TEUCHOS_TEST_FOR_EXCEPTION(bop2.is_null() == true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
658  TEUCHOS_TEST_FOR_EXCEPTION(bop1->Cols() != bop2->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << bop1->Cols() << " columns and B has " << bop2->Rows() << " rows. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
659  TEUCHOS_TEST_FOR_EXCEPTION(bop1->getDomainMap()->isSameAs(*(bop2->getRangeMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
660 
661  // recursive multiplication call
662  temp = TwoMatrixMultiplyBlock(*bop1, transposeA, *bop2, transposeB, fos, doFillComplete, doOptimizeStorage);
663 
664  RCP<BlockedCrsMatrix> btemp = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(temp);
665  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Rows() != bop1->Rows(), Xpetra::Exceptions::RuntimeError, "Number of block rows of local blocked operator is " << btemp->Rows() << " but should be " << bop1->Rows() << ". (TwoMatrixMultiplyBlock)");
666  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Cols() != bop2->Cols(), Xpetra::Exceptions::RuntimeError, "Number of block cols of local blocked operator is " << btemp->Cols() << " but should be " << bop2->Cols() << ". (TwoMatrixMultiplyBlock)");
667  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getFullMap()->isSameAs(*(bop1->getRangeMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of local blocked operator should be same as first operator. (TwoMatrixMultiplyBlock)");
668  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getFullMap()->isSameAs(*(bop2->getDomainMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of local blocked operator should be same as second operator. (TwoMatrixMultiplyBlock)");
669  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getThyraMode() != bop1->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local range map extractor incompatible with range map extractor of A (TwoMatrixMultiplyBlock)");
670  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getThyraMode() != bop2->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local domain map extractor incompatible with domain map extractor of B (TwoMatrixMultiplyBlock)");
671  }
672 
673  TEUCHOS_TEST_FOR_EXCEPTION(temp->isFillComplete() == false, Xpetra::Exceptions::RuntimeError, "Local block is not filled. (TwoMatrixMultiplyBlock)");
674 
675  RCP<Matrix> addRes = null;
676  if (Cij.is_null())
677  Cij = temp;
678  else {
679  MatrixMatrix::TwoMatrixAdd(*temp, false, 1.0, *Cij, false, 1.0, addRes, fos);
680  Cij = addRes;
681  }
682  }
683 
684  if (!Cij.is_null()) {
685  if (Cij->isFillComplete())
686  Cij->resumeFill();
687  Cij->fillComplete(B.getDomainMap(j), A.getRangeMap(i));
688  C->setMatrix(i, j, Cij);
689  } else {
690  C->setMatrix(i, j, Teuchos::null);
691  }
692  }
693  }
694 
695  if (doFillComplete)
696  C->fillComplete(); // call default fillComplete for BlockCrsMatrixWrap objects
697 
698  return C;
699  } // TwoMatrixMultiplyBlock
700 
713  static void TwoMatrixAdd(const Matrix& A, bool transposeA, SC alpha, Matrix& B, SC beta) {
714  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRowMap()->isSameAs(*B.getRowMap())), Exceptions::Incompatible,
715  "TwoMatrixAdd: matrix row maps are not the same.");
716 
717  if (A.getRowMap()->lib() == Xpetra::UseEpetra) {
718 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
719  const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(A);
721 
722  // FIXME is there a bug if beta=0?
723  int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, beta);
724 
725  if (rv != 0)
726  throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value " + Teuchos::toString(rv));
727  std::ostringstream buf;
728 #else
729  throw Exceptions::RuntimeError("Xpetra must be compiled with EpetraExt.");
730 #endif
731  } else if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
732 #ifdef HAVE_XPETRA_TPETRA
733 #if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
734  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
735  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=int enabled."));
736 #else
737  const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraCrs(A);
738  Tpetra::CrsMatrix<SC, LO, GO, NO>& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstTpetraCrs(B);
739 
740  Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, beta);
741 #endif
742 #else
743  throw Exceptions::RuntimeError("Xpetra must be compiled with Tpetra.");
744 #endif
745  }
746  } // MatrixMatrix::TwoMatrixAdd()
747 
762  static void TwoMatrixAdd(const Matrix& A, bool transposeA, const SC& alpha,
763  const Matrix& B, bool transposeB, const SC& beta,
764  RCP<Matrix>& C, Teuchos::FancyOStream& fos, bool AHasFixedNnzPerRow = false) {
765  using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
766  RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
767  RCP<const Matrix> rcpB = Teuchos::rcpFromRef(B);
768  RCP<const BlockedCrsMatrix> rcpBopA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpA);
769  RCP<const BlockedCrsMatrix> rcpBopB = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpB);
770 
771  if (rcpBopA == Teuchos::null && rcpBopB == Teuchos::null) {
772  if (!(A.getRowMap()->isSameAs(*(B.getRowMap()))))
773  throw Exceptions::Incompatible("TwoMatrixAdd: matrix row maps are not the same.");
774 
775  auto lib = A.getRowMap()->lib();
776  if (lib == Xpetra::UseEpetra) {
777 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
778  const Epetra_CrsMatrix& epA = helpers::Op2EpetraCrs(A);
779  const Epetra_CrsMatrix& epB = helpers::Op2EpetraCrs(B);
780  if (C.is_null()) {
781  size_t maxNzInA = 0;
782  size_t maxNzInB = 0;
783  size_t numLocalRows = 0;
784  if (A.isFillComplete() && B.isFillComplete()) {
785  maxNzInA = A.getLocalMaxNumRowEntries();
786  maxNzInB = B.getLocalMaxNumRowEntries();
787  numLocalRows = A.getLocalNumRows();
788  }
789 
790  if (maxNzInA == 1 || maxNzInB == 1 || AHasFixedNnzPerRow) {
791  // first check if either A or B has at most 1 nonzero per row
792  // the case of both having at most 1 nz per row is handled by the ``else''
793  Teuchos::ArrayRCP<size_t> exactNnzPerRow(numLocalRows);
794 
795  if ((maxNzInA == 1 && maxNzInB > 1) || AHasFixedNnzPerRow) {
796  for (size_t i = 0; i < numLocalRows; ++i)
797  exactNnzPerRow[i] = B.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInA;
798 
799  } else {
800  for (size_t i = 0; i < numLocalRows; ++i)
801  exactNnzPerRow[i] = A.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInB;
802  }
803 
804  fos << "MatrixMatrix::TwoMatrixAdd : special case detected (one matrix has a fixed nnz per row)"
805  << ", using static profiling" << std::endl;
806  C = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO>(A.getRowMap(), exactNnzPerRow));
807 
808  } else {
809  // general case
810  LO maxPossibleNNZ = A.getLocalMaxNumRowEntries() + B.getLocalMaxNumRowEntries();
811  C = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO>(A.getRowMap(), maxPossibleNNZ));
812  }
813  if (transposeB)
814  fos << "MatrixMatrix::TwoMatrixAdd : ** WARNING ** estimate could be badly wrong because second summand is transposed" << std::endl;
815  }
816  RCP<Epetra_CrsMatrix> epC = helpers::Op2NonConstEpetraCrs(C);
817  Epetra_CrsMatrix* ref2epC = &*epC; // to avoid a compiler error...
818 
819  // FIXME is there a bug if beta=0?
820  int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, transposeB, beta, ref2epC);
821 
822  if (rv != 0)
823  throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value of " + Teuchos::toString(rv));
824 #else
825  throw Exceptions::RuntimeError("MueLu must be compile with EpetraExt.");
826 #endif
827  } else if (lib == Xpetra::UseTpetra) {
828 #ifdef HAVE_XPETRA_TPETRA
829 #if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
830  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
831  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=int enabled."));
832 #else
833  using tcrs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NO>;
834  const tcrs_matrix_type& tpA = helpers::Op2TpetraCrs(A);
835  const tcrs_matrix_type& tpB = helpers::Op2TpetraCrs(B);
836  C = helpers::tpetraAdd(tpA, transposeA, alpha, tpB, transposeB, beta);
837 #endif
838 #else
839  throw Exceptions::RuntimeError("Xpetra must be compile with Tpetra.");
840 #endif
841  }
842 
844  if (A.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(A));
845  if (B.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(B));
847  }
848  // the first matrix is of type CrsMatrixWrap, the second is a blocked operator
849  else if (rcpBopA == Teuchos::null && rcpBopB != Teuchos::null) {
850  RCP<const MapExtractor> rgmapextractor = rcpBopB->getRangeMapExtractor();
851  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
852 
853  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
854  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
855 
856  size_t i = 0;
857  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
858  RCP<Matrix> Cij = Teuchos::null;
859  if (rcpA != Teuchos::null &&
860  rcpBopB->getMatrix(i, j) != Teuchos::null) {
861  // recursive call
862  TwoMatrixAdd(*rcpA, transposeA, alpha,
863  *(rcpBopB->getMatrix(i, j)), transposeB, beta,
864  Cij, fos, AHasFixedNnzPerRow);
865  } else if (rcpA == Teuchos::null &&
866  rcpBopB->getMatrix(i, j) != Teuchos::null) {
867  Cij = rcpBopB->getMatrix(i, j);
868  } else if (rcpA != Teuchos::null &&
869  rcpBopB->getMatrix(i, j) == Teuchos::null) {
870  Cij = Teuchos::rcp_const_cast<Matrix>(rcpA);
871  } else {
872  Cij = Teuchos::null;
873  }
874 
875  if (!Cij.is_null()) {
876  if (Cij->isFillComplete())
877  Cij->resumeFill();
878  Cij->fillComplete();
879  bC->setMatrix(i, j, Cij);
880  } else {
881  bC->setMatrix(i, j, Teuchos::null);
882  }
883  } // loop over columns j
884  }
885  // the second matrix is of type CrsMatrixWrap, the first is a blocked operator
886  else if (rcpBopA != Teuchos::null && rcpBopB == Teuchos::null) {
887  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
888  RCP<const MapExtractor> domapextractor = rcpBopA->getDomainMapExtractor();
889 
890  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
891  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
892 
893  size_t j = 0;
894  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
895  RCP<Matrix> Cij = Teuchos::null;
896  if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
897  rcpB != Teuchos::null) {
898  // recursive call
899  TwoMatrixAdd(*(rcpBopA->getMatrix(i, j)), transposeA, alpha,
900  *rcpB, transposeB, beta,
901  Cij, fos, AHasFixedNnzPerRow);
902  } else if (rcpBopA->getMatrix(i, j) == Teuchos::null &&
903  rcpB != Teuchos::null) {
904  Cij = Teuchos::rcp_const_cast<Matrix>(rcpB);
905  } else if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
906  rcpB == Teuchos::null) {
907  Cij = rcpBopA->getMatrix(i, j);
908  } else {
909  Cij = Teuchos::null;
910  }
911 
912  if (!Cij.is_null()) {
913  if (Cij->isFillComplete())
914  Cij->resumeFill();
915  Cij->fillComplete();
916  bC->setMatrix(i, j, Cij);
917  } else {
918  bC->setMatrix(i, j, Teuchos::null);
919  }
920  } // loop over rows i
921  } else {
922  // This is the version for blocked matrices
923 
924  // check the compatibility of the blocked operators
925  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA.is_null() == true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixAdd)");
926  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopB.is_null() == true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixAdd)");
927  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows() != rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Rows() << " rows and B has " << rcpBopA->Rows() << " rows. Matrices are not compatible! (TwoMatrixAdd)");
928  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows() != rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Cols() << " cols and B has " << rcpBopA->Cols() << " cols. Matrices are not compatible! (TwoMatrixAdd)");
929  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMap()->isSameAs(*(rcpBopB->getRangeMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixAdd)");
930  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMap()->isSameAs(*(rcpBopB->getDomainMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as domain map of B. Matrices are not compatible! (TwoMatrixAdd)");
931  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMapExtractor()->getThyraMode() != rcpBopB->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in RangeMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
932  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMapExtractor()->getThyraMode() != rcpBopB->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in DomainMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
933 
934  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
935  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
936 
937  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
938  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
939 
940  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
941  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
942 
943  RCP<Matrix> Cij = Teuchos::null;
944  if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
945  rcpBopB->getMatrix(i, j) != Teuchos::null) {
946  // recursive call
947 
948  TwoMatrixAdd(*(rcpBopA->getMatrix(i, j)), transposeA, alpha,
949  *(rcpBopB->getMatrix(i, j)), transposeB, beta,
950  Cij, fos, AHasFixedNnzPerRow);
951  } else if (rcpBopA->getMatrix(i, j) == Teuchos::null &&
952  rcpBopB->getMatrix(i, j) != Teuchos::null) {
953  Cij = rcpBopB->getMatrix(i, j);
954  } else if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
955  rcpBopB->getMatrix(i, j) == Teuchos::null) {
956  Cij = rcpBopA->getMatrix(i, j);
957  } else {
958  Cij = Teuchos::null;
959  }
960 
961  if (!Cij.is_null()) {
962  if (Cij->isFillComplete())
963  Cij->resumeFill();
964  // Cij->fillComplete(rcpBopA->getDomainMap(j), rcpBopA->getRangeMap(i));
965  Cij->fillComplete();
966  bC->setMatrix(i, j, Cij);
967  } else {
968  bC->setMatrix(i, j, Teuchos::null);
969  }
970  } // loop over columns j
971  } // loop over rows i
972 
973  } // end blocked recursive algorithm
974  } // MatrixMatrix::TwoMatrixAdd()
975 }; // end specialization on SC=double, GO=int and NO=EpetraNode
976 
977 // specialization MatrixMatrix for SC=double, GO=long long and NO=EptraNode
978 template <>
979 class MatrixMatrix<double, int, long long, EpetraNode> {
980  typedef double Scalar;
981  typedef int LocalOrdinal;
982  typedef long long GlobalOrdinal;
983  typedef EpetraNode Node;
984 #include "Xpetra_UseShortNames.hpp"
985 
986  public:
1011  static void Multiply(const Matrix& A, bool transposeA,
1012  const Matrix& B, bool transposeB,
1013  Matrix& C,
1014  bool call_FillComplete_on_result = true,
1015  bool doOptimizeStorage = true,
1016  const std::string& label = std::string(),
1017  const RCP<ParameterList>& params = null) {
1018  using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1019  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == false && C.getRowMap()->isSameAs(*A.getRowMap()) == false,
1020  Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as row map of A");
1021  TEUCHOS_TEST_FOR_EXCEPTION(transposeA == true && C.getRowMap()->isSameAs(*A.getDomainMap()) == false,
1022  Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as domain map of A");
1023 
1024  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Xpetra::Exceptions::RuntimeError, "A is not fill-completed");
1025  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Xpetra::Exceptions::RuntimeError, "B is not fill-completed");
1026 
1027  bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;
1028 
1029  if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
1030 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1031  helpers::epetraExtMult(A, transposeA, B, transposeB, C, haveMultiplyDoFillComplete);
1032 #else
1033  throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Multiply requires EpetraExt to be compiled."));
1034 #endif
1035  } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
1036 #ifdef HAVE_XPETRA_TPETRA
1037 #if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \
1038  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))))
1039  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra <double,int,long long, EpetraNode> ETI enabled."));
1040 #else
1041  if (helpers::isTpetraCrs(A) && helpers::isTpetraCrs(B) && helpers::isTpetraCrs(C)) {
1042  // All matrices are Crs
1043  const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpA = helpers::Op2TpetraCrs(A);
1044  const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpB = helpers::Op2TpetraCrs(B);
1045  Tpetra::CrsMatrix<SC, LO, GO, NO>& tpC = helpers::Op2NonConstTpetraCrs(C);
1046 
1047  // 18Feb2013 JJH I'm reenabling the code that allows the matrix matrix multiply to do the fillComplete.
1048  // Previously, Tpetra's matrix matrix multiply did not support fillComplete.
1049  Tpetra::MatrixMatrix::Multiply(tpA, transposeA, tpB, transposeB, tpC, haveMultiplyDoFillComplete, label, params);
1050  } else if (helpers::isTpetraBlockCrs(A) && helpers::isTpetraBlockCrs(B)) {
1051  // All matrices are BlockCrs (except maybe Ac)
1052  // FIXME: For the moment we're just going to clobber the innards of Ac, so no reuse. Once we have a reuse kernel,
1053  // we'll need to think about refactoring BlockCrs so we can do something smarter here.
1054 
1055  if (!A.getRowMap()->getComm()->getRank())
1056  std::cout << "WARNING: Using inefficient BlockCrs Multiply Placeholder" << std::endl;
1057 
1058  const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraBlockCrs(A);
1059  const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraBlockCrs(B);
1060  using CRS = Tpetra::CrsMatrix<SC, LO, GO, NO>;
1061  RCP<const CRS> Acrs = Tpetra::convertToCrsMatrix(tpA);
1062  RCP<const CRS> Bcrs = Tpetra::convertToCrsMatrix(tpB);
1063 
1064  // We need the global constants to do the copy back to BlockCrs
1065  RCP<ParameterList> new_params;
1066  if (!params.is_null()) {
1067  new_params = rcp(new Teuchos::ParameterList(*params));
1068  new_params->set("compute global constants", true);
1069  }
1070 
1071  // FIXME: The lines below only works because we're assuming Ac is Point
1072  RCP<CRS> tempAc = Teuchos::rcp(new CRS(Acrs->getRowMap(), 0));
1073  Tpetra::MatrixMatrix::Multiply(*Acrs, transposeA, *Bcrs, transposeB, *tempAc, haveMultiplyDoFillComplete, label, new_params);
1074 
1075  // Temporary output matrix
1076  RCP<Tpetra::BlockCrsMatrix<SC, LO, GO, NO> > Ac_t = Tpetra::convertToBlockCrsMatrix(*tempAc, A.GetStorageBlockSize());
1077  RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> > Ac_x = Teuchos::rcp(new Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO>(Ac_t));
1078  RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > Ac_p = Ac_x;
1079 
1080  // We can now cheat and replace the innards of Ac
1081  RCP<Xpetra::CrsMatrixWrap<SC, LO, GO, NO> > Ac_w = Teuchos::rcp_dynamic_cast<Xpetra::CrsMatrixWrap<SC, LO, GO, NO> >(Teuchos::rcpFromRef(C));
1082  Ac_w->replaceCrsMatrix(Ac_p);
1083  } else {
1084  // Mix and match
1085  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "Mix-and-match Crs/BlockCrs Multiply not currently supported");
1086  }
1087 
1088 #endif
1089 #else
1090  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
1091 #endif
1092  }
1093 
1094  if (call_FillComplete_on_result && !haveMultiplyDoFillComplete) {
1095  RCP<Teuchos::ParameterList> fillParams = rcp(new Teuchos::ParameterList());
1096  fillParams->set("Optimize Storage", doOptimizeStorage);
1097  C.fillComplete((transposeB) ? B.getRangeMap() : B.getDomainMap(),
1098  (transposeA) ? A.getDomainMap() : A.getRangeMap(),
1099  fillParams);
1100  }
1101 
1102  // transfer striding information
1103  RCP<Matrix> rcpA = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(A));
1104  RCP<Matrix> rcpB = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(B));
1105  C.CreateView("stridedMaps", rcpA, transposeA, rcpB, transposeB); // TODO use references instead of RCPs
1106  } // end Multiply
1107 
1130  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
1131  const Matrix& B, bool transposeB,
1132  RCP<Matrix> C_in,
1133  Teuchos::FancyOStream& fos,
1134  bool doFillComplete = true,
1135  bool doOptimizeStorage = true,
1136  const std::string& label = std::string(),
1137  const RCP<ParameterList>& params = null) {
1138  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
1139  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
1140 
1141  // Default case: Xpetra Multiply
1142  RCP<Matrix> C = C_in;
1143 
1144  if (C == Teuchos::null) {
1145  double nnzPerRow = Teuchos::as<double>(0);
1146 
1147 #if 0
1148  if (A.getDomainMap()->lib() == Xpetra::UseTpetra) {
1149  // For now, follow what ML and Epetra do.
1150  GO numRowsA = A.getGlobalNumRows();
1151  GO numRowsB = B.getGlobalNumRows();
1152  nnzPerRow = sqrt(Teuchos::as<double>(A.getGlobalNumEntries())/numRowsA) +
1153  sqrt(Teuchos::as<double>(B.getGlobalNumEntries())/numRowsB) - 1;
1154  nnzPerRow *= nnzPerRow;
1155  double totalNnz = nnzPerRow * A.getGlobalNumRows() * 0.75 + 100;
1156  double minNnz = Teuchos::as<double>(1.2 * A.getGlobalNumEntries());
1157  if (totalNnz < minNnz)
1158  totalNnz = minNnz;
1159  nnzPerRow = totalNnz / A.getGlobalNumRows();
1160 
1161  fos << "Matrix product nnz per row estimate = " << Teuchos::as<LO>(nnzPerRow) << std::endl;
1162  }
1163 #endif
1164  if (transposeA)
1165  C = MatrixFactory::Build(A.getDomainMap(), Teuchos::as<LO>(nnzPerRow));
1166  else
1167  C = MatrixFactory::Build(A.getRowMap(), Teuchos::as<LO>(nnzPerRow));
1168 
1169  } else {
1170  C->resumeFill(); // why this is not done inside of Tpetra MxM?
1171  fos << "Reuse C pattern" << std::endl;
1172  }
1173 
1174  Multiply(A, transposeA, B, transposeB, *C, doFillComplete, doOptimizeStorage, label, params); // call Multiply routine from above
1175 
1176  return C;
1177  }
1178 
1189  static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
1190  const Matrix& B, bool transposeB,
1191  Teuchos::FancyOStream& fos,
1192  bool callFillCompleteOnResult = true,
1193  bool doOptimizeStorage = true,
1194  const std::string& label = std::string(),
1195  const RCP<ParameterList>& params = null) {
1196  return Multiply(A, transposeA, B, transposeB, Teuchos::null, fos, callFillCompleteOnResult, doOptimizeStorage, label, params);
1197  }
1198 
1199 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1200  // Michael Gee's MLMultiply
1201  static RCP<Epetra_CrsMatrix> MLTwoMatrixMultiply(const Epetra_CrsMatrix& /* epA */,
1202  const Epetra_CrsMatrix& /* epB */,
1203  Teuchos::FancyOStream& /* fos */) {
1204  TEUCHOS_TEST_FOR_EXCEPTION(true, Xpetra::Exceptions::RuntimeError,
1205  "No ML multiplication available. This feature is currently not supported by Xpetra.");
1206  TEUCHOS_UNREACHABLE_RETURN(Teuchos::null);
1207  }
1208 #endif // ifdef HAVE_XPETRA_EPETRAEXT
1209 
1220  static RCP<BlockedCrsMatrix> TwoMatrixMultiplyBlock(const BlockedCrsMatrix& A, bool transposeA,
1221  const BlockedCrsMatrix& B, bool transposeB,
1222  Teuchos::FancyOStream& fos,
1223  bool doFillComplete = true,
1224  bool doOptimizeStorage = true) {
1225  TEUCHOS_TEST_FOR_EXCEPTION(transposeA || transposeB, Exceptions::RuntimeError,
1226  "TwoMatrixMultiply for BlockedCrsMatrix not implemented for transposeA==true or transposeB==true");
1227 
1228  // Preconditions
1229  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
1230  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
1231 
1232  RCP<const MapExtractor> rgmapextractor = A.getRangeMapExtractor();
1233  RCP<const MapExtractor> domapextractor = B.getDomainMapExtractor();
1234 
1235  RCP<BlockedCrsMatrix> C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1236 
1237  for (size_t i = 0; i < A.Rows(); ++i) { // loop over all block rows of A
1238  for (size_t j = 0; j < B.Cols(); ++j) { // loop over all block columns of B
1239  RCP<Matrix> Cij;
1240 
1241  for (size_t l = 0; l < B.Rows(); ++l) { // loop for calculating entry C_{ij}
1242  RCP<Matrix> crmat1 = A.getMatrix(i, l);
1243  RCP<Matrix> crmat2 = B.getMatrix(l, j);
1244 
1245  if (crmat1.is_null() || crmat2.is_null())
1246  continue;
1247 
1248  // try unwrapping 1x1 blocked matrix
1249  {
1250  auto unwrap1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
1251  auto unwrap2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
1252 
1253  if (unwrap1.is_null() != unwrap2.is_null()) {
1254  if (unwrap1 != Teuchos::null && unwrap1->Rows() == 1 && unwrap1->Cols() == 1)
1255  crmat1 = unwrap1->getCrsMatrix();
1256  if (unwrap2 != Teuchos::null && unwrap2->Rows() == 1 && unwrap2->Cols() == 1)
1257  crmat2 = unwrap2->getCrsMatrix();
1258  }
1259  }
1260 
1261  RCP<CrsMatrixWrap> crop1 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat1);
1262  RCP<CrsMatrixWrap> crop2 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat2);
1263  TEUCHOS_TEST_FOR_EXCEPTION(crop1.is_null() != crop2.is_null(), Xpetra::Exceptions::RuntimeError,
1264  "A and B must be either both (compatible) BlockedCrsMatrix objects or both CrsMatrixWrap objects.");
1265 
1266  // Forcibly compute the global constants if we don't have them (only works for real CrsMatrices, not nested blocks)
1267  if (!crop1.is_null())
1268  Teuchos::rcp_const_cast<CrsGraph>(crmat1->getCrsGraph())->computeGlobalConstants();
1269  if (!crop2.is_null())
1270  Teuchos::rcp_const_cast<CrsGraph>(crmat2->getCrsGraph())->computeGlobalConstants();
1271 
1272  TEUCHOS_TEST_FOR_EXCEPTION(!crmat1->haveGlobalConstants(), Exceptions::RuntimeError,
1273  "crmat1 does not have global constants");
1274  TEUCHOS_TEST_FOR_EXCEPTION(!crmat2->haveGlobalConstants(), Exceptions::RuntimeError,
1275  "crmat2 does not have global constants");
1276 
1277  if (crmat1->getGlobalNumEntries() == 0 || crmat2->getGlobalNumEntries() == 0)
1278  continue;
1279 
1280  // temporary matrix containing result of local block multiplication
1281  RCP<Matrix> temp = Teuchos::null;
1282 
1283  if (crop1 != Teuchos::null && crop2 != Teuchos::null)
1284  temp = Multiply(*crop1, false, *crop2, false, fos);
1285  else {
1286  RCP<BlockedCrsMatrix> bop1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
1287  RCP<BlockedCrsMatrix> bop2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
1288  TEUCHOS_TEST_FOR_EXCEPTION(bop1.is_null() == true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
1289  TEUCHOS_TEST_FOR_EXCEPTION(bop2.is_null() == true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
1290  TEUCHOS_TEST_FOR_EXCEPTION(bop1->Cols() != bop2->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << bop1->Cols() << " columns and B has " << bop2->Rows() << " rows. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
1291  TEUCHOS_TEST_FOR_EXCEPTION(bop1->getDomainMap()->isSameAs(*(bop2->getRangeMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
1292 
1293  // recursive multiplication call
1294  temp = TwoMatrixMultiplyBlock(*bop1, transposeA, *bop2, transposeB, fos, doFillComplete, doOptimizeStorage);
1295 
1296  RCP<BlockedCrsMatrix> btemp = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(temp);
1297  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Rows() != bop1->Rows(), Xpetra::Exceptions::RuntimeError, "Number of block rows of local blocked operator is " << btemp->Rows() << " but should be " << bop1->Rows() << ". (TwoMatrixMultiplyBlock)");
1298  TEUCHOS_TEST_FOR_EXCEPTION(btemp->Cols() != bop2->Cols(), Xpetra::Exceptions::RuntimeError, "Number of block cols of local blocked operator is " << btemp->Cols() << " but should be " << bop2->Cols() << ". (TwoMatrixMultiplyBlock)");
1299  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getFullMap()->isSameAs(*(bop1->getRangeMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of local blocked operator should be same as first operator. (TwoMatrixMultiplyBlock)");
1300  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getFullMap()->isSameAs(*(bop2->getDomainMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of local blocked operator should be same as second operator. (TwoMatrixMultiplyBlock)");
1301  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getThyraMode() != bop1->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local range map extractor incompatible with range map extractor of A (TwoMatrixMultiplyBlock)");
1302  TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getThyraMode() != bop2->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local domain map extractor incompatible with domain map extractor of B (TwoMatrixMultiplyBlock)");
1303  }
1304 
1305  TEUCHOS_TEST_FOR_EXCEPTION(temp->isFillComplete() == false, Xpetra::Exceptions::RuntimeError, "Local block is not filled. (TwoMatrixMultiplyBlock)");
1306 
1307  RCP<Matrix> addRes = null;
1308  if (Cij.is_null())
1309  Cij = temp;
1310  else {
1311  MatrixMatrix::TwoMatrixAdd(*temp, false, 1.0, *Cij, false, 1.0, addRes, fos);
1312  Cij = addRes;
1313  }
1314  }
1315 
1316  if (!Cij.is_null()) {
1317  if (Cij->isFillComplete())
1318  Cij->resumeFill();
1319  Cij->fillComplete(B.getDomainMap(j), A.getRangeMap(i));
1320  C->setMatrix(i, j, Cij);
1321  } else {
1322  C->setMatrix(i, j, Teuchos::null);
1323  }
1324  }
1325  }
1326 
1327  if (doFillComplete)
1328  C->fillComplete(); // call default fillComplete for BlockCrsMatrixWrap objects
1329 
1330  return C;
1331  } // TwoMatrixMultiplyBlock
1332 
1345  static void TwoMatrixAdd(const Matrix& A, bool transposeA, SC alpha, Matrix& B, SC beta) {
1346  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRowMap()->isSameAs(*(B.getRowMap()))), Exceptions::Incompatible,
1347  "TwoMatrixAdd: matrix row maps are not the same.");
1348 
1349  if (A.getRowMap()->lib() == Xpetra::UseEpetra) {
1350 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1351  const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(A);
1352  Epetra_CrsMatrix& epB = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstEpetraCrs(B);
1353 
1354  // FIXME is there a bug if beta=0?
1355  int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, beta);
1356 
1357  if (rv != 0)
1358  throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value " + Teuchos::toString(rv));
1359  std::ostringstream buf;
1360 #else
1361  throw Exceptions::RuntimeError("Xpetra must be compiled with EpetraExt.");
1362 #endif
1363  } else if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
1364 #ifdef HAVE_XPETRA_TPETRA
1365 #if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \
1366  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))))
1367  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=long long enabled."));
1368 #else
1369  const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraCrs(A);
1370  Tpetra::CrsMatrix<SC, LO, GO, NO>& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstTpetraCrs(B);
1371 
1372  Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, beta);
1373 #endif
1374 #else
1375  throw Exceptions::RuntimeError("Xpetra must be compiled with Tpetra.");
1376 #endif
1377  }
1378  } // MatrixMatrix::TwoMatrixAdd()
1379 
1394  static void TwoMatrixAdd(const Matrix& A, bool transposeA, const SC& alpha,
1395  const Matrix& B, bool transposeB, const SC& beta,
1396  RCP<Matrix>& C, Teuchos::FancyOStream& fos, bool AHasFixedNnzPerRow = false) {
1397  RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
1398  RCP<const Matrix> rcpB = Teuchos::rcpFromRef(B);
1399  RCP<const BlockedCrsMatrix> rcpBopA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpA);
1400  RCP<const BlockedCrsMatrix> rcpBopB = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpB);
1401 
1402  if (rcpBopA == Teuchos::null && rcpBopB == Teuchos::null) {
1403  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRowMap()->isSameAs(*(B.getRowMap()))), Exceptions::Incompatible,
1404  "TwoMatrixAdd: matrix row maps are not the same.");
1405  auto lib = A.getRowMap()->lib();
1406  if (lib == Xpetra::UseEpetra) {
1407 #if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1408  const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(A);
1409  const Epetra_CrsMatrix& epB = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(B);
1410  if (C.is_null()) {
1411  size_t maxNzInA = 0;
1412  size_t maxNzInB = 0;
1413  size_t numLocalRows = 0;
1414  if (A.isFillComplete() && B.isFillComplete()) {
1415  maxNzInA = A.getLocalMaxNumRowEntries();
1416  maxNzInB = B.getLocalMaxNumRowEntries();
1417  numLocalRows = A.getLocalNumRows();
1418  }
1419 
1420  if (maxNzInA == 1 || maxNzInB == 1 || AHasFixedNnzPerRow) {
1421  // first check if either A or B has at most 1 nonzero per row
1422  // the case of both having at most 1 nz per row is handled by the ``else''
1423  Teuchos::ArrayRCP<size_t> exactNnzPerRow(numLocalRows);
1424 
1425  if ((maxNzInA == 1 && maxNzInB > 1) || AHasFixedNnzPerRow) {
1426  for (size_t i = 0; i < numLocalRows; ++i)
1427  exactNnzPerRow[i] = B.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInA;
1428 
1429  } else {
1430  for (size_t i = 0; i < numLocalRows; ++i)
1431  exactNnzPerRow[i] = A.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInB;
1432  }
1433 
1434  fos << "MatrixMatrix::TwoMatrixAdd : special case detected (one matrix has a fixed nnz per row)"
1435  << ", using static profiling" << std::endl;
1436  C = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO>(A.getRowMap(), exactNnzPerRow));
1437 
1438  } else {
1439  // general case
1440  LO maxPossibleNNZ = A.getLocalMaxNumRowEntries() + B.getLocalMaxNumRowEntries();
1441  C = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO>(A.getRowMap(), maxPossibleNNZ));
1442  }
1443  if (transposeB)
1444  fos << "MatrixMatrix::TwoMatrixAdd : ** WARNING ** estimate could be badly wrong because second summand is transposed" << std::endl;
1445  }
1446  RCP<Epetra_CrsMatrix> epC = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstEpetraCrs(C);
1447  Epetra_CrsMatrix* ref2epC = &*epC; // to avoid a compiler error...
1448 
1449  // FIXME is there a bug if beta=0?
1450  int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, transposeB, beta, ref2epC);
1451 
1452  if (rv != 0)
1453  throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value of " + Teuchos::toString(rv));
1454 #else
1455  throw Exceptions::RuntimeError("MueLu must be compile with EpetraExt.");
1456 #endif
1457  } else if (lib == Xpetra::UseTpetra) {
1458 #ifdef HAVE_XPETRA_TPETRA
1459 #if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \
1460  (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))))
1461  throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=long long enabled."));
1462 #else
1463  using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1464  using tcrs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NO>;
1465  const tcrs_matrix_type& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraCrs(A);
1466  const tcrs_matrix_type& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraCrs(B);
1467  C = helpers::tpetraAdd(tpA, transposeA, alpha, tpB, transposeB, beta);
1468 #endif
1469 #else
1470  throw Exceptions::RuntimeError("Xpetra must be compile with Tpetra.");
1471 #endif
1472  }
1473 
1475  if (A.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(A));
1476  if (B.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(B));
1478  }
1479  // the first matrix is of type CrsMatrixWrap, the second is a blocked operator
1480  else if (rcpBopA == Teuchos::null && rcpBopB != Teuchos::null) {
1481  RCP<const MapExtractor> rgmapextractor = rcpBopB->getRangeMapExtractor();
1482  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
1483 
1484  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1485  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1486 
1487  size_t i = 0;
1488  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
1489  RCP<Matrix> Cij = Teuchos::null;
1490  if (rcpA != Teuchos::null &&
1491  rcpBopB->getMatrix(i, j) != Teuchos::null) {
1492  // recursive call
1493  TwoMatrixAdd(*rcpA, transposeA, alpha,
1494  *(rcpBopB->getMatrix(i, j)), transposeB, beta,
1495  Cij, fos, AHasFixedNnzPerRow);
1496  } else if (rcpA == Teuchos::null &&
1497  rcpBopB->getMatrix(i, j) != Teuchos::null) {
1498  Cij = rcpBopB->getMatrix(i, j);
1499  } else if (rcpA != Teuchos::null &&
1500  rcpBopB->getMatrix(i, j) == Teuchos::null) {
1501  Cij = Teuchos::rcp_const_cast<Matrix>(rcpA);
1502  } else {
1503  Cij = Teuchos::null;
1504  }
1505 
1506  if (!Cij.is_null()) {
1507  if (Cij->isFillComplete())
1508  Cij->resumeFill();
1509  Cij->fillComplete();
1510  bC->setMatrix(i, j, Cij);
1511  } else {
1512  bC->setMatrix(i, j, Teuchos::null);
1513  }
1514  } // loop over columns j
1515  }
1516  // the second matrix is of type CrsMatrixWrap, the first is a blocked operator
1517  else if (rcpBopA != Teuchos::null && rcpBopB == Teuchos::null) {
1518  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
1519  RCP<const MapExtractor> domapextractor = rcpBopA->getDomainMapExtractor();
1520 
1521  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1522  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1523 
1524  size_t j = 0;
1525  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
1526  RCP<Matrix> Cij = Teuchos::null;
1527  if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
1528  rcpB != Teuchos::null) {
1529  // recursive call
1530  TwoMatrixAdd(*(rcpBopA->getMatrix(i, j)), transposeA, alpha,
1531  *rcpB, transposeB, beta,
1532  Cij, fos, AHasFixedNnzPerRow);
1533  } else if (rcpBopA->getMatrix(i, j) == Teuchos::null &&
1534  rcpB != Teuchos::null) {
1535  Cij = Teuchos::rcp_const_cast<Matrix>(rcpB);
1536  } else if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
1537  rcpB == Teuchos::null) {
1538  Cij = rcpBopA->getMatrix(i, j);
1539  } else {
1540  Cij = Teuchos::null;
1541  }
1542 
1543  if (!Cij.is_null()) {
1544  if (Cij->isFillComplete())
1545  Cij->resumeFill();
1546  Cij->fillComplete();
1547  bC->setMatrix(i, j, Cij);
1548  } else {
1549  bC->setMatrix(i, j, Teuchos::null);
1550  }
1551  } // loop over rows i
1552  } else {
1553  // This is the version for blocked matrices
1554 
1555  // check the compatibility of the blocked operators
1556  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA.is_null() == true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixAdd)");
1557  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopB.is_null() == true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixAdd)");
1558  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows() != rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Rows() << " rows and B has " << rcpBopA->Rows() << " rows. Matrices are not compatible! (TwoMatrixAdd)");
1559  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows() != rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Cols() << " cols and B has " << rcpBopA->Cols() << " cols. Matrices are not compatible! (TwoMatrixAdd)");
1560  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMap()->isSameAs(*(rcpBopB->getRangeMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixAdd)");
1561  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMap()->isSameAs(*(rcpBopB->getDomainMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as domain map of B. Matrices are not compatible! (TwoMatrixAdd)");
1562  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMapExtractor()->getThyraMode() != rcpBopB->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in RangeMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
1563  TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMapExtractor()->getThyraMode() != rcpBopB->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in DomainMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
1564 
1565  RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
1566  RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
1567 
1568  C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1569  RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1570 
1571  for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
1572  for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
1573 
1574  RCP<Matrix> Cij = Teuchos::null;
1575  if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
1576  rcpBopB->getMatrix(i, j) != Teuchos::null) {
1577  // recursive call
1578 
1579  TwoMatrixAdd(*(rcpBopA->getMatrix(i, j)), transposeA, alpha,
1580  *(rcpBopB->getMatrix(i, j)), transposeB, beta,
1581  Cij, fos, AHasFixedNnzPerRow);
1582  } else if (rcpBopA->getMatrix(i, j) == Teuchos::null &&
1583  rcpBopB->getMatrix(i, j) != Teuchos::null) {
1584  Cij = rcpBopB->getMatrix(i, j);
1585  } else if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
1586  rcpBopB->getMatrix(i, j) == Teuchos::null) {
1587  Cij = rcpBopA->getMatrix(i, j);
1588  } else {
1589  Cij = Teuchos::null;
1590  }
1591 
1592  if (!Cij.is_null()) {
1593  if (Cij->isFillComplete())
1594  Cij->resumeFill();
1595  // Cij->fillComplete(rcpBopA->getDomainMap(j), rcpBopA->getRangeMap(i));
1596  Cij->fillComplete();
1597  bC->setMatrix(i, j, Cij);
1598  } else {
1599  bC->setMatrix(i, j, Teuchos::null);
1600  }
1601  } // loop over columns j
1602  } // loop over rows i
1603 
1604  } // end blocked recursive algorithm
1605  } // MatrixMatrix::TwoMatrixAdd()
1606 }; // end specialization on GO=long long and NO=EpetraNode
1607 
1608 #endif // HAVE_XPETRA_EPETRA
1609 
1610 } // end namespace Xpetra
1611 
1612 #define XPETRA_MATRIXMATRIX_SHORT
1613 
1614 #endif /* PACKAGES_XPETRA_SUP_UTILS_XPETRA_MATRIXMATRIX_DECL_HPP_ */
std::string toString(Xpetra::UnderlyingLib lib)
Convert a Xpetra::UnderlyingLib to a std::string.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, const SC &alpha, const Matrix &B, bool transposeB, const SC &beta, RCP< Matrix > &C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow=false)
Helper function to calculate C = alpha*A + beta*B.
static RCP< BlockedCrsMatrix > TwoMatrixMultiplyBlock(const BlockedCrsMatrix &A, bool transposeA, const BlockedCrsMatrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true)
Helper function to do matrix-matrix multiply &quot;in-place&quot;.
virtual global_size_t getGlobalNumRows() const =0
Returns the number of global rows in this matrix.
RCP< const MapExtractor > getDomainMapExtractor() const
Returns map extractor for domain map.
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
bool isFillComplete() const
Returns true if fillComplete() has been called and the matrix is in compute mode. ...
Xpetra utility class containing transformation routines between Xpetra::Matrix and Epetra/Tpetra obje...
virtual const RCP< const Map > & getRowMap() const
Returns the Map that describes the row distribution in this matrix.
virtual void resumeFill(const RCP< ParameterList > &params=null)=0
virtual LocalOrdinal GetStorageBlockSize() const =0
Returns the block size of the storage mechanism, which is usually 1, except for Tpetra::BlockCrsMatri...
Exception throws to report errors in the internal logical of the program.
static RCP< BlockedCrsMatrix > TwoMatrixMultiplyBlock(const BlockedCrsMatrix &A, bool transposeA, const BlockedCrsMatrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true)
Helper function to do matrix-matrix multiply &quot;in-place&quot;.
virtual size_t Cols() const
number of column blocks
static RCP< const Tpetra::CrsMatrix< SC, LO, GO, NO > > Op2TpetraCrs(RCP< Matrix > Op)
const RCP< const Map > getDomainMap() const
Returns the Map associated with the domain of this operator.
Exception indicating invalid cast attempted.
virtual void fillComplete(const RCP< const Map > &domainMap, const RCP< const Map > &rangeMap, const RCP< ParameterList > &params=null)=0
Signal that data entry is complete, specifying domain and range maps.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, const SC &alpha, const Matrix &B, bool transposeB, const SC &beta, RCP< Matrix > &C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow=false)
Helper function to calculate C = alpha*A + beta*B.
static RCP< Epetra_CrsMatrix > MLTwoMatrixMultiply(const Epetra_CrsMatrix &, const Epetra_CrsMatrix &, Teuchos::FancyOStream &)
bool IsView(const viewLabel_t viewLabel) const
virtual bool isFillComplete() const =0
Returns true if fillComplete() has been called and the matrix is in compute mode. ...
void CreateView(viewLabel_t viewLabel, const RCP< const Map > &rowMap, const RCP< const Map > &colMap)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
static RCP< Epetra_CrsMatrix > MLTwoMatrixMultiply(const Epetra_CrsMatrix &epA, const Epetra_CrsMatrix &epB, Teuchos::FancyOStream &fos)
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
virtual size_t Rows() const
number of row blocks
const RCP< const Map > getRangeMap() const
Returns the Map associated with the range of this operator.
virtual const Teuchos::RCP< const map_type > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
static RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > Op2NonConstTpetraCrs(RCP< Matrix > Op)
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
Returns the current number of entries on this node in the specified local row.
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
virtual size_t getLocalNumRows() const =0
Returns the number of matrix rows owned on the calling node.
RCP< const MapExtractor > getRangeMapExtractor() const
Returns map extractor class for range map.
Teuchos::RCP< Matrix > getMatrix(size_t r, size_t c) const
return block (r,c)
Tpetra::KokkosCompat::KokkosSerialWrapperNode EpetraNode
void replaceCrsMatrix(RCP< CrsMatrix > &M)
Expert only.
static RCP< Matrix > Build(const RCP< const Map > &rowMap)
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
Helper function to calculate B = alpha*A + beta*B.
static RCP< const Epetra_CrsMatrix > Op2EpetraCrs(RCP< Matrix > Op)
Exception throws to report incompatible objects (like maps).
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
Helper function to calculate B = alpha*A + beta*B.
virtual global_size_t getGlobalNumEntries() const =0
Returns the global number of entries in this matrix.
Concrete implementation of Xpetra::Matrix.
virtual const Teuchos::RCP< const map_type > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
virtual size_t getLocalMaxNumRowEntries() const =0
Returns the maximum number of entries across all rows/columns on this node.
static RCP< const Tpetra::BlockCrsMatrix< SC, LO, GO, NO > > Op2TpetraBlockCrs(RCP< Matrix > Op)
static RCP< Epetra_CrsMatrix > MLTwoMatrixMultiply(const Epetra_CrsMatrix &epA, const Epetra_CrsMatrix &epB, Teuchos::FancyOStream &fos)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
static RCP< BlockedCrsMatrix > TwoMatrixMultiplyBlock(const BlockedCrsMatrix &A, bool transposeA, const BlockedCrsMatrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true)
Helper function to do matrix-matrix multiply &quot;in-place&quot;.
static RCP< Epetra_CrsMatrix > Op2NonConstEpetraCrs(RCP< Matrix > Op)
Teuchos::RCP< Matrix > getCrsMatrix() const
return unwrap 1x1 blocked operators
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool callFillCompleteOnResult=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
Helper function to calculate B = alpha*A + beta*B.
Xpetra-specific matrix class.
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool callFillCompleteOnResult=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.