Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_LocalSparseTriangularSolver_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
44 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
45 
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_Core.hpp"
48 #include "Teuchos_StandardParameterEntryValidators.hpp"
49 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
50 #include "KokkosSparse_trsv.hpp"
51 
52 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
53 # include "shylu_hts.hpp"
54 #endif
55 
56 namespace Ifpack2 {
57 
58 namespace Details {
59 struct TrisolverType {
60  enum Enum {
61  Internal,
62  HTS
63  };
64 
65  static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
66  type_strs.resize(2);
67  type_strs[0] = "Internal";
68  type_strs[1] = "HTS";
69  type_enums.resize(2);
70  type_enums[0] = Internal;
71  type_enums[1] = HTS;
72  }
73 };
74 }
75 
76 template<class MatrixType>
77 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
78 public:
79  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
80 
81  void reset () {
82 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
83  Timpl_ = Teuchos::null;
84  levelset_block_size_ = 1;
85 #endif
86  }
87 
88  void setParameters (const Teuchos::ParameterList& pl) {
89  (void)pl;
90 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
91  const char* block_size_s = "trisolver: block size";
92  if (pl.isParameter(block_size_s)) {
93  TEUCHOS_TEST_FOR_EXCEPT_MSG( ! pl.isType<int>(block_size_s),
94  "The parameter \"" << block_size_s << "\" must be of type int.");
95  levelset_block_size_ = pl.get<int>(block_size_s);
96  }
97  if (levelset_block_size_ < 1)
98  levelset_block_size_ = 1;
99 #endif
100  }
101 
102  // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
103  // call to compute() will trigger the symbolic+numeric phase, and subsequent
104  // calls (with the same Timpl_) will trigger the numeric phase. In the call to
105  // initialize(), essentially nothing happens.
106  void initialize (const crs_matrix_type& /* unused */) {
107 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
108  reset();
109  transpose_ = conjugate_ = false;
110 #endif
111  }
112 
113  void compute (const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
114  (void)T_in;
115  (void)out;
116 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
117  using Teuchos::ArrayRCP;
118 
122  T_in.getAllValues(rowptr, colidx, val);
123 
124  Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
125  HTST::make_CrsMatrix(rowptr.size() - 1,
126  rowptr.getRawPtr(), colidx.getRawPtr(), val.getRawPtr(),
127  transpose_, conjugate_),
128  HtsCrsMatrixDeleter());
129 
130  if (Teuchos::nonnull(Timpl_)) {
131  // Reuse the nonzero pattern.
132  HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
133  } else {
134  // Build from scratch.
135  if (T_in.getCrsGraph().is_null()) {
136  if (Teuchos::nonnull(out))
137  *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
138  return;
139  }
140  if ( ! T_in.getCrsGraph()->isSorted()) {
141  if (Teuchos::nonnull(out))
142  *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
143  return;
144  }
145  if ( ! T_in.isStorageOptimized()) {
146  if (Teuchos::nonnull(out))
147  *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
148  return;
149  }
150 
151  typename HTST::PreprocessArgs args;
152  args.T = T_hts.get();
153  args.max_nrhs = 1;
154 #ifdef _OPENMP
155  args.nthreads = omp_get_max_threads();
156 #else
157  args.nthreads = 1;
158 #endif
159  args.save_for_reprocess = true;
160  typename HTST::Options opts;
161  opts.levelset_block_size = levelset_block_size_;
162  args.options = &opts;
163 
164  try {
165  Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
166  } catch (const std::exception& e) {
167  if (Teuchos::nonnull(out))
168  *out << "HTS preprocess threw: " << e.what() << "\n";
169  }
170  }
171 #endif
172  }
173 
174  // HTS may not be able to handle a matrix, so query whether compute()
175  // succeeded.
176  bool isComputed () {
177 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
178  return Teuchos::nonnull(Timpl_);
179 #else
180  return false;
181 #endif
182  }
183 
184  // Y := beta * Y + alpha * (M * X)
185  void localApply (const MV& X, MV& Y,
186  const Teuchos::ETransp /* mode */,
187  const scalar_type& alpha, const scalar_type& beta) const {
188  (void)X;
189  (void)Y;
190  (void)alpha;
191  (void)beta;
192 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
193  const auto& X_view = X.getLocalViewHost ();
194  const auto& Y_view = Y.getLocalViewHost ();
195 
196  // Only does something if #rhs > current capacity.
197  HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
198  // Switch alpha and beta because of HTS's opposite convention.
199  HTST::solve_omp(Timpl_.get(),
200  // For std/Kokkos::complex.
201  reinterpret_cast<const scalar_type*>(X_view.data()),
202  X_view.extent(1),
203  // For std/Kokkos::complex.
204  reinterpret_cast<scalar_type*>(Y_view.data()),
205  beta, alpha);
206 #endif
207  }
208 
209 private:
210 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
211  typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
212  typedef typename HTST::Impl TImpl;
213  typedef typename HTST::CrsMatrix HtsCrsMatrix;
214 
215  struct TImplDeleter {
216  void free (TImpl* impl) {
217  HTST::delete_Impl(impl);
218  }
219  };
220 
221  struct HtsCrsMatrixDeleter {
222  void free (HtsCrsMatrix* T) {
223  HTST::delete_CrsMatrix(T);
224  }
225  };
226 
227  Teuchos::RCP<TImpl> Timpl_;
228  bool transpose_, conjugate_;
229  int levelset_block_size_;
230 #endif
231 };
232 
233 template<class MatrixType>
236  A_ (A)
237 {
238  initializeState();
239  typedef typename Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
241  if (! A.is_null ()) {
243  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
245  (A_crs.is_null (), std::invalid_argument,
246  "Ifpack2::LocalSparseTriangularSolver constructor: "
247  "The input matrix A is not a Tpetra::CrsMatrix.");
248  A_crs_ = A_crs;
249  }
250 }
251 
252 template<class MatrixType>
256  A_ (A),
257  out_ (out)
258 {
259  initializeState();
260  if (! out_.is_null ()) {
261  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
262  << std::endl;
263  }
264  typedef typename Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
266  if (! A.is_null ()) {
268  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
270  (A_crs.is_null (), std::invalid_argument,
271  "Ifpack2::LocalSparseTriangularSolver constructor: "
272  "The input matrix A is not a Tpetra::CrsMatrix.");
273  A_crs_ = A_crs;
274  }
275 }
276 
277 template<class MatrixType>
280 {
281  initializeState();
282 }
283 
284 template<class MatrixType>
287  out_ (out)
288 {
289  initializeState();
290  if (! out_.is_null ()) {
291  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
292  << std::endl;
293  }
294 }
295 
296 template<class MatrixType>
298 {
299  isInitialized_ = false;
300  isComputed_ = false;
301  reverseStorage_ = false;
302  isInternallyChanged_ = false;
303  numInitialize_ = 0;
304  numCompute_ = 0;
305  numApply_ = 0;
306  initializeTime_ = 0.0;
307  computeTime_ = 0.0;
308  applyTime_ = 0.0;
309  uplo_ = "N";
310  diag_ = "N";
311 }
312 
313 template<class MatrixType>
316 {}
317 
318 template<class MatrixType>
319 void
322 {
323  using Teuchos::RCP;
325  using Teuchos::Array;
326 
327  Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
328  do {
329  static const char typeName[] = "trisolver: type";
330 
331  if ( ! pl.isType<std::string>(typeName)) break;
332 
333  // Map std::string <-> TrisolverType::Enum.
334  Array<std::string> trisolverTypeStrs;
335  Array<Details::TrisolverType::Enum> trisolverTypeEnums;
336  Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
338  s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName, false);
339 
340  trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
341  } while (0);
342 
343  if (trisolverType == Details::TrisolverType::HTS) {
344  htsImpl_ = Teuchos::rcp (new HtsImpl ());
345  htsImpl_->setParameters (pl);
346  }
347 
348  if (pl.isParameter("trisolver: reverse U"))
349  reverseStorage_ = pl.get<bool>("trisolver: reverse U");
350 
352  (reverseStorage_ && trisolverType == Details::TrisolverType::HTS,
353  std::logic_error, "Ifpack2::LocalSparseTriangularSolver::setParameters: "
354  "You are not allowed to enable both HTS and the \"trisolver: reverse U\" "
355  "options. See GitHub issue #2647.");
356 }
357 
358 template<class MatrixType>
359 void
362 {
363  using Tpetra::Details::determineLocalTriangularStructure;
364  using crs_matrix_type = Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
366  using local_matrix_type = typename crs_matrix_type::local_matrix_type;
367  using LO = local_ordinal_type;
368 
369  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
370  if (! out_.is_null ()) {
371  *out_ << ">>> DEBUG " << prefix << std::endl;
372  }
373 
375  (A_.is_null (), std::runtime_error, prefix << "You must call "
376  "setMatrix() with a nonnull input matrix before you may call "
377  "initialize() or compute().");
378  if (A_crs_.is_null ()) {
379  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
381  (A_crs.get () == nullptr, std::invalid_argument,
382  prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
383  A_crs_ = A_crs;
384  }
385  auto G = A_crs_->getGraph ();
387  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
388  "but A_crs_'s RowGraph G is null. "
389  "Please report this bug to the Ifpack2 developers.");
390  // At this point, the graph MUST be fillComplete. The "initialize"
391  // (symbolic) part of setup only depends on the graph structure, so
392  // the matrix itself need not be fill complete.
394  (! G->isFillComplete (), std::runtime_error, "If you call this method, "
395  "the matrix's graph must be fill complete. It is not.");
396 
397  // mfh 30 Apr 2018: See GitHub Issue #2658.
398  constexpr bool ignoreMapsForTriStructure = true;
399  auto lclTriStructure = [&] {
400  auto lclMatrix = A_crs_->getLocalMatrix ();
401  auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
402  auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
403  auto lclTriStruct =
404  determineLocalTriangularStructure (lclMatrix.graph,
405  lclRowMap,
406  lclColMap,
407  ignoreMapsForTriStructure);
408  const LO lclNumRows = lclRowMap.getNodeNumElements ();
409  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
410  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
411  (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
412  return lclTriStruct;
413  } ();
414 
415  if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
416  htsImpl_.is_null ()) {
417  // Reverse the storage for an upper triangular matrix
418  auto Alocal = A_crs_->getLocalMatrix();
419  auto ptr = Alocal.graph.row_map;
420  auto ind = Alocal.graph.entries;
421  auto val = Alocal.values;
422 
423  auto numRows = Alocal.numRows();
424  auto numCols = Alocal.numCols();
425  auto numNnz = Alocal.nnz();
426 
427  typename decltype(ptr)::non_const_type newptr ("ptr", ptr.extent (0));
428  typename decltype(ind)::non_const_type newind ("ind", ind.extent (0));
429  decltype(val) newval ("val", val.extent (0));
430 
431  // FIXME: The code below assumes UVM
432  typename crs_matrix_type::execution_space().fence();
433  newptr(0) = 0;
434  for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
435  auto A_r = Alocal.row(numRows-1 - row);
436 
437  auto numEnt = A_r.length;
438  for (local_ordinal_type k = 0; k < numEnt; ++k) {
439  newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
440  newval(rowStart + k) = A_r.value (numEnt-1 - k);
441  }
442  rowStart += numEnt;
443  newptr(row+1) = rowStart;
444  }
445  typename crs_matrix_type::execution_space().fence();
446 
447  // Reverse maps
448  using map_type = typename crs_matrix_type::map_type;
449  Teuchos::RCP<map_type> newRowMap, newColMap;
450  {
451  // Reverse row map
452  auto rowMap = A_->getRowMap();
453  auto numElems = rowMap->getNodeNumElements();
454  auto rowElems = rowMap->getNodeElementList();
455 
456  Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
457  for (size_t i = 0; i < numElems; i++)
458  newRowElems[i] = rowElems[numElems-1 - i];
459 
460  newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
461  }
462  {
463  // Reverse column map
464  auto colMap = A_->getColMap();
465  auto numElems = colMap->getNodeNumElements();
466  auto colElems = colMap->getNodeElementList();
467 
468  Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
469  for (size_t i = 0; i < numElems; i++)
470  newColElems[i] = colElems[numElems-1 - i];
471 
472  newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
473  }
474 
475  // Construct new matrix
476  local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
477 
478  A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
479 
480  isInternallyChanged_ = true;
481 
482  // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
483  // didn't want to break any invariants, especially considering
484  // that this branch is likely poorly tested.
485  auto newLclTriStructure =
486  determineLocalTriangularStructure (newLocalMatrix.graph,
487  newRowMap->getLocalMap (),
488  newColMap->getLocalMap (),
489  ignoreMapsForTriStructure);
490  const LO newLclNumRows = newRowMap->getNodeNumElements ();
491  this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
492  this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" :
493  (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
494  }
495 
496  if (Teuchos::nonnull (htsImpl_))
497  {
498  htsImpl_->initialize (*A_crs_);
499  isInternallyChanged_ = true;
500  }
501 
502  isInitialized_ = true;
503  ++numInitialize_;
504 }
505 
506 template<class MatrixType>
507 void
510 {
511  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
512  if (! out_.is_null ()) {
513  *out_ << ">>> DEBUG " << prefix << std::endl;
514  }
515 
517  (A_.is_null (), std::runtime_error, prefix << "You must call "
518  "setMatrix() with a nonnull input matrix before you may call "
519  "initialize() or compute().");
521  (A_crs_.is_null (), std::logic_error, prefix << "A_ is nonnull, but "
522  "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
523  // At this point, the matrix MUST be fillComplete.
525  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
526  "method, the matrix must be fill complete. It is not.");
527 
528  if (! isInitialized_) {
529  initialize ();
530  }
532  (! isInitialized_, std::logic_error, prefix << "initialize() should have "
533  "been called by this point, but isInitialized_ is false. "
534  "Please report this bug to the Ifpack2 developers.");
535 
536  if (Teuchos::nonnull (htsImpl_))
537  htsImpl_->compute (*A_crs_, out_);
538 
539  isComputed_ = true;
540  ++numCompute_;
541 }
542 
543 template<class MatrixType>
545 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type,
547  Tpetra::MultiVector<scalar_type, local_ordinal_type,
549  Teuchos::ETransp mode,
550  scalar_type alpha,
551  scalar_type beta) const
552 {
553  using Teuchos::RCP;
554  using Teuchos::rcp;
555  using Teuchos::rcpFromRef;
556  typedef scalar_type ST;
557  typedef Teuchos::ScalarTraits<ST> STS;
558  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
559  if (! out_.is_null ()) {
560  *out_ << ">>> DEBUG " << prefix;
561  if (A_crs_.is_null ()) {
562  *out_ << "A_crs_ is null!" << std::endl;
563  }
564  else {
566  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
567  const std::string uplo = this->uplo_;
568  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
569  (mode == Teuchos::TRANS ? "T" : "N");
570  const std::string diag = this->diag_;
571  *out_ << "uplo=\"" << uplo
572  << "\", trans=\"" << trans
573  << "\", diag=\"" << diag << "\"" << std::endl;
574  }
575  }
576 
578  (! isComputed (), std::runtime_error, prefix << "If compute() has not yet "
579  "been called, or if you have changed the matrix via setMatrix(), you must "
580  "call compute() before you may call this method.");
581  // If isComputed() is true, it's impossible for the matrix to be
582  // null, or for it not to be a Tpetra::CrsMatrix.
584  (A_.is_null (), std::logic_error, prefix << "A_ is null. "
585  "Please report this bug to the Ifpack2 developers.");
587  (A_crs_.is_null (), std::logic_error, prefix << "A_crs_ is null. "
588  "Please report this bug to the Ifpack2 developers.");
589  // However, it _is_ possible that the user called resumeFill() on
590  // the matrix, after calling compute(). This is NOT allowed.
592  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
593  "method, the matrix must be fill complete. It is not. This means that "
594  " you must have called resumeFill() on the matrix before calling apply(). "
595  "This is NOT allowed. Note that this class may use the matrix's data in "
596  "place without copying it. Thus, you cannot change the matrix and expect "
597  "the solver to stay the same. If you have changed the matrix, first call "
598  "fillComplete() on it, then call compute() on this object, before you call"
599  " apply(). You do NOT need to call setMatrix, as long as the matrix "
600  "itself (that is, its address in memory) is the same.");
601 
602  auto G = A_crs_->getGraph ();
604  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
605  "but A_crs_'s RowGraph G is null. "
606  "Please report this bug to the Ifpack2 developers.");
607  auto importer = G->getImporter ();
608  auto exporter = G->getExporter ();
609 
610  if (! importer.is_null ()) {
611  if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
612  X_colMap_ = rcp (new MV (importer->getTargetMap (), X.getNumVectors ()));
613  }
614  else {
615  X_colMap_->putScalar (STS::zero ());
616  }
617  // See discussion of Github Issue #672 for why the Import needs to
618  // use the ZERO CombineMode. The case where the Export is
619  // nontrivial is likely never exercised.
620  X_colMap_->doImport (X, *importer, Tpetra::ZERO);
621  }
622  RCP<const MV> X_cur = importer.is_null () ? rcpFromRef (X) :
623  Teuchos::rcp_const_cast<const MV> (X_colMap_);
624 
625  if (! exporter.is_null ()) {
626  if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
627  Y_rowMap_ = rcp (new MV (exporter->getSourceMap (), Y.getNumVectors ()));
628  }
629  else {
630  Y_rowMap_->putScalar (STS::zero ());
631  }
632  Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
633  }
634  RCP<MV> Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
635 
636  localApply (*X_cur, *Y_cur, mode, alpha, beta);
637 
638  if (! exporter.is_null ()) {
639  Y.putScalar (STS::zero ());
640  Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
641  }
642 
643  ++numApply_;
644 }
645 
646 template<class MatrixType>
647 void
649 localTriangularSolve (const MV& Y,
650  MV& X,
651  const Teuchos::ETransp mode) const
652 {
653  using Teuchos::CONJ_TRANS;
654  using Teuchos::NO_TRANS;
655  using Teuchos::TRANS;
656  const char tfecfFuncName[] = "localTriangularSolve: ";
657 
659  (! A_crs_->isFillComplete (), std::runtime_error,
660  "The matrix is not fill complete.");
662  (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
663  "X and Y must be constant stride.");
665  ( A_crs_->getNodeNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
666  "The matrix is neither upper triangular or lower triangular. "
667  "You may only call this method if the matrix is triangular. "
668  "Remember that this is a local (per MPI process) property, and that "
669  "Tpetra only knows how to do a local (per process) triangular solve.");
672  (STS::isComplex && mode == TRANS, std::logic_error, "This method does "
673  "not currently support non-conjugated transposed solve (mode == "
674  "Teuchos::TRANS) for complex scalar types.");
675 
676  // FIXME (mfh 19 May 2016) This makes some Ifpack2 tests fail.
677  //
678  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
679  // (Y.template need_sync<device_type> () && !
680  // Y.template need_sync<Kokkos::HostSpace> (), std::runtime_error,
681  // "Y must be sync'd to device memory before you may call this method.");
682 
683  const std::string uplo = this->uplo_;
684  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
685  (mode == Teuchos::TRANS ? "T" : "N");
686  const std::string diag = this->diag_;
687  auto A_lcl = this->A_crs_->getLocalMatrix ();
688 
689  // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
690  // sequential, host-only code. See
691  // https://github.com/kokkos/kokkos-kernels/issues/48. This
692  // means that we need to sync to host, then sync back to device
693  // when done.
694  X.sync_host ();
695  const_cast<MV&> (Y).sync_host ();
696  X.modify_host (); // we will write to X
697 
698  if (X.isConstantStride () && Y.isConstantStride ()) {
699  auto X_lcl = X.getLocalViewHost ();
700  auto Y_lcl = Y.getLocalViewHost ();
701  KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
702  A_lcl, Y_lcl, X_lcl);
703  }
704  else {
705  const size_t numVecs =
706  std::min (X.getNumVectors (), Y.getNumVectors ());
707  for (size_t j = 0; j < numVecs; ++j) {
708  auto X_j = X.getVector (j);
709  auto Y_j = X.getVector (j);
710  auto X_lcl = X_j->getLocalViewHost ();
711  auto Y_lcl = Y_j->getLocalViewHost ();
712  KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
713  diag.c_str (), A_lcl, Y_lcl, X_lcl);
714  }
715  }
716 
717  X.sync_device ();
718  const_cast<MV&> (Y).sync_device ();
719 }
720 
721 template<class MatrixType>
722 void
723 LocalSparseTriangularSolver<MatrixType>::
724 localApply (const MV& X,
725  MV& Y,
726  const Teuchos::ETransp mode,
727  const scalar_type& alpha,
728  const scalar_type& beta) const
729 {
730  if (mode == Teuchos::NO_TRANS && Teuchos::nonnull (htsImpl_) &&
731  htsImpl_->isComputed ()) {
732  htsImpl_->localApply (X, Y, mode, alpha, beta);
733  return;
734  }
735 
736  using Teuchos::RCP;
737  typedef scalar_type ST;
738  typedef Teuchos::ScalarTraits<ST> STS;
739 
740  if (beta == STS::zero ()) {
741  if (alpha == STS::zero ()) {
742  Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y)
743  }
744  else { // alpha != 0
745  this->localTriangularSolve (X, Y, mode);
746  if (alpha != STS::one ()) {
747  Y.scale (alpha);
748  }
749  }
750  }
751  else { // beta != 0
752  if (alpha == STS::zero ()) {
753  Y.scale (beta); // Y := beta * Y
754  }
755  else { // alpha != 0
756  MV Y_tmp (Y, Teuchos::Copy);
757  this->localTriangularSolve (X, Y_tmp, mode); // Y_tmp := M * X
758  Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
759  }
760  }
761 }
762 
763 
764 template <class MatrixType>
765 int
768  return numInitialize_;
769 }
770 
771 template <class MatrixType>
772 int
774 getNumCompute () const {
775  return numCompute_;
776 }
777 
778 template <class MatrixType>
779 int
781 getNumApply () const {
782  return numApply_;
783 }
784 
785 template <class MatrixType>
786 double
789  return initializeTime_;
790 }
791 
792 template<class MatrixType>
793 double
795 getComputeTime () const {
796  return computeTime_;
797 }
798 
799 template<class MatrixType>
800 double
802 getApplyTime() const {
803  return applyTime_;
804 }
805 
806 template <class MatrixType>
807 std::string
809 description () const
810 {
811  std::ostringstream os;
812 
813  // Output is a valid YAML dictionary in flow style. If you don't
814  // like everything on a single line, you should call describe()
815  // instead.
816  os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
817  if (this->getObjectLabel () != "") {
818  os << "Label: \"" << this->getObjectLabel () << "\", ";
819  }
820  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
821  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
822 
823  if (A_.is_null ()) {
824  os << "Matrix: null";
825  }
826  else {
827  os << "Matrix: not null"
828  << ", Global matrix dimensions: ["
829  << A_->getGlobalNumRows () << ", "
830  << A_->getGlobalNumCols () << "]";
831  }
832 
833  if (Teuchos::nonnull (htsImpl_))
834  os << ", HTS computed: " << (htsImpl_->isComputed () ? "true" : "false");
835 
836  os << "}";
837  return os.str ();
838 }
839 
840 template <class MatrixType>
843  const Teuchos::EVerbosityLevel verbLevel) const
844 {
845  using std::endl;
846  // Default verbosity level is VERB_LOW, which prints only on Process
847  // 0 of the matrix's communicator.
848  const Teuchos::EVerbosityLevel vl
849  = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
850 
851  if (vl != Teuchos::VERB_NONE) {
852  // Print only on Process 0 in the matrix's communicator. If the
853  // matrix is null, though, we have to get the communicator from
854  // somewhere, so we ask Tpetra for its default communicator. If
855  // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
856  auto comm = A_.is_null () ?
857  Tpetra::getDefaultComm () :
858  A_->getComm ();
859 
860  // Users aren't supposed to do anything with the matrix on
861  // processes where its communicator is null.
862  if (! comm.is_null () && comm->getRank () == 0) {
863  // By convention, describe() should always begin with a tab.
864  Teuchos::OSTab tab0 (out);
865  // Output is in YAML format. We have to escape the class name,
866  // because it has a colon.
867  out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
868  Teuchos::OSTab tab1 (out);
869  out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name () << endl
870  << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name () << endl
871  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name () << endl
872  << "Node: " << Teuchos::TypeNameTraits<node_type>::name () << endl;
873  }
874  }
875 }
876 
877 template <class MatrixType>
880 getDomainMap () const
881 {
883  (A_.is_null (), std::runtime_error,
884  "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
885  "The matrix is null. Please call setMatrix() with a nonnull input "
886  "before calling this method.");
887  return A_->getDomainMap ();
888 }
889 
890 template <class MatrixType>
893 getRangeMap () const
894 {
896  (A_.is_null (), std::runtime_error,
897  "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
898  "The matrix is null. Please call setMatrix() with a nonnull input "
899  "before calling this method.");
900  return A_->getRangeMap ();
901 }
902 
903 template<class MatrixType>
906 {
907  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
908 
909  // If the pointer didn't change, do nothing. This is reasonable
910  // because users are supposed to call this method with the same
911  // object over all participating processes, and pointer identity
912  // implies object identity.
913  if (A.getRawPtr () != A_.getRawPtr () || isInternallyChanged_) {
914  // Check in serial or one-process mode if the matrix is square.
916  (! A.is_null () && A->getComm ()->getSize () == 1 &&
917  A->getNodeNumRows () != A->getNodeNumCols (),
918  std::runtime_error, prefix << "If A's communicator only contains one "
919  "process, then A must be square. Instead, you provided a matrix A with "
920  << A->getNodeNumRows () << " rows and " << A->getNodeNumCols ()
921  << " columns.");
922 
923  // It's legal for A to be null; in that case, you may not call
924  // initialize() until calling setMatrix() with a nonnull input.
925  // Regardless, setting the matrix invalidates the preconditioner.
926  isInitialized_ = false;
927  isComputed_ = false;
928 
929  typedef typename Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
931  if (A.is_null ()) {
932  A_crs_ = Teuchos::null;
933  A_ = Teuchos::null;
934  }
935  else { // A is not null
937  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
939  (A_crs.is_null (), std::invalid_argument, prefix <<
940  "The input matrix A is not a Tpetra::CrsMatrix.");
941  A_crs_ = A_crs;
942  A_ = A;
943  }
944 
945  if (Teuchos::nonnull (htsImpl_))
946  htsImpl_->reset ();
947  } // pointers are not the same
948 }
949 
950 } // namespace Ifpack2
951 
952 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
953  template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
954 
955 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:545
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:774
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:809
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
&quot;Numeric&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:509
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:93
T * getRawPtr() const
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:209
T * get() const
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:802
size_type size() const
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:795
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:788
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:95
bool isParameter(const std::string &name) const
T * getRawPtr() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:880
&quot;Preconditioner&quot; that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:77
void initialize()
&quot;Symbolic&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:361
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:767
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:100
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner&#39;s matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:905
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string &paramName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:781
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:91
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:893
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:106
void setParameters(const Teuchos::ParameterList &params)
Set this object&#39;s parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:321
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:89
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:842
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:315
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:279
static std::string name()
std::string typeName(const T &t)
bool is_null() const