Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_LocalSparseTriangularSolver_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
12 
13 #include <sstream> // ostringstream
14 #include <stdexcept> // runtime_error
15 
16 #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
17 #include "Tpetra_CrsMatrix.hpp"
18 #include "Tpetra_Core.hpp"
19 #include "Teuchos_StandardParameterEntryValidators.hpp"
20 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
21 #include "KokkosSparse_trsv.hpp"
22 
23 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
24 #include "shylu_hts.hpp"
25 #endif
26 
27 namespace Ifpack2 {
28 
29 namespace Details {
30 
31 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
32 
33 inline void cusparse_error_throw(cusparseStatus_t cusparseStatus, const char* name,
34  const char* file, const int line) {
35  std::ostringstream out;
36 #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)
37  out << name << " error( " << cusparseGetErrorName(cusparseStatus) << "): " << cusparseGetErrorString(cusparseStatus);
38 #else
39  out << name << " error( ";
40  switch (cusparseStatus) {
41  case CUSPARSE_STATUS_NOT_INITIALIZED:
42  out << "CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not "
43  "created correctly.";
44  break;
45  case CUSPARSE_STATUS_ALLOC_FAILED:
46  out << "CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too "
47  "much memory";
48  break;
49  case CUSPARSE_STATUS_INVALID_VALUE: out << "CUSPARSE_STATUS_INVALID_VALUE)"; break;
50  case CUSPARSE_STATUS_ARCH_MISMATCH: out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; break;
51  case CUSPARSE_STATUS_MAPPING_ERROR: out << "CUSPARSE_STATUS_MAPPING_ERROR)"; break;
52  case CUSPARSE_STATUS_EXECUTION_FAILED: out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; break;
53  case CUSPARSE_STATUS_INTERNAL_ERROR: out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; break;
54  case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break;
55  case CUSPARSE_STATUS_ZERO_PIVOT: out << "CUSPARSE_STATUS_ZERO_PIVOT)"; break;
56  default: out << "unrecognized error code): this is bad!"; break;
57  }
58 #endif // CUSPARSE_VERSION
59  if (file) {
60  out << " " << file << ":" << line;
61  }
62  throw std::runtime_error(out.str());
63 }
64 
65 inline void cusparse_safe_call(cusparseStatus_t cusparseStatus, const char* name, const char* file = nullptr,
66  const int line = 0) {
67  if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) {
68  cusparse_error_throw(cusparseStatus, name, file, line);
69  }
70 }
71 
72 #define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \
73  Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__)
74 
75 #endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
76 
77 struct TrisolverType {
78  enum Enum {
79  Internal,
80  HTS,
81  KSPTRSV
82  };
83 
84  static void loadPLTypeOption(Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
85  type_strs.resize(3);
86  type_strs[0] = "Internal";
87  type_strs[1] = "HTS";
88  type_strs[2] = "KSPTRSV";
89  type_enums.resize(3);
90  type_enums[0] = Internal;
91  type_enums[1] = HTS;
92  type_enums[2] = KSPTRSV;
93  }
94 };
95 } // namespace Details
96 
97 template <class MatrixType>
98 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
99  public:
100  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
101 
102  void reset() {
103 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
104  Timpl_ = Teuchos::null;
105  levelset_block_size_ = 1;
106 #endif
107  }
108 
109  void setParameters(const Teuchos::ParameterList& pl) {
110  (void)pl;
111 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
112  const char* block_size_s = "trisolver: block size";
113  if (pl.isParameter(block_size_s)) {
114  TEUCHOS_TEST_FOR_EXCEPT_MSG(!pl.isType<int>(block_size_s),
115  "The parameter \"" << block_size_s << "\" must be of type int.");
116  levelset_block_size_ = pl.get<int>(block_size_s);
117  }
118  if (levelset_block_size_ < 1)
119  levelset_block_size_ = 1;
120 #endif
121  }
122 
123  // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
124  // call to compute() will trigger the symbolic+numeric phase, and subsequent
125  // calls (with the same Timpl_) will trigger the numeric phase. In the call to
126  // initialize(), essentially nothing happens.
127  void initialize(const crs_matrix_type& /* unused */) {
128 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
129  reset();
130  transpose_ = conjugate_ = false;
131 #endif
132  }
133 
134  void compute(const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
135  (void)T_in;
136  (void)out;
137 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
138  using Teuchos::ArrayRCP;
139 
140  auto rowptr = T_in.getLocalRowPtrsHost();
141  auto colidx = T_in.getLocalIndicesHost();
142  auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
143  Kokkos::fence();
144 
145  Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
146  HTST::make_CrsMatrix(rowptr.size() - 1,
147  rowptr.data(), colidx.data(),
148  // For std/Kokkos::complex.
149  reinterpret_cast<const scalar_type*>(val.data()),
150  transpose_, conjugate_),
151  HtsCrsMatrixDeleter());
152 
153  if (Teuchos::nonnull(Timpl_)) {
154  // Reuse the nonzero pattern.
155  HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
156  } else {
157  // Build from scratch.
158  if (T_in.getCrsGraph().is_null()) {
159  if (Teuchos::nonnull(out))
160  *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
161  return;
162  }
163  if (!T_in.getCrsGraph()->isSorted()) {
164  if (Teuchos::nonnull(out))
165  *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
166  return;
167  }
168  if (!T_in.isStorageOptimized()) {
169  if (Teuchos::nonnull(out))
170  *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
171  return;
172  }
173 
174  typename HTST::PreprocessArgs args;
175  args.T = T_hts.get();
176  args.max_nrhs = 1;
177 #ifdef _OPENMP
178  args.nthreads = omp_get_max_threads();
179 #else
180  args.nthreads = 1;
181 #endif
182  args.save_for_reprocess = true;
183  typename HTST::Options opts;
184  opts.levelset_block_size = levelset_block_size_;
185  args.options = &opts;
186 
187  try {
188  Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
189  } catch (const std::exception& e) {
190  if (Teuchos::nonnull(out))
191  *out << "HTS preprocess threw: " << e.what() << "\n";
192  }
193  }
194 #endif
195  }
196 
197  // HTS may not be able to handle a matrix, so query whether compute()
198  // succeeded.
199  bool isComputed() {
200 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
201  return Teuchos::nonnull(Timpl_);
202 #else
203  return false;
204 #endif
205  }
206 
207  // Y := beta * Y + alpha * (M * X)
208  void localApply(const MV& X, MV& Y,
209  const Teuchos::ETransp /* mode */,
210  const scalar_type& alpha, const scalar_type& beta) const {
211  (void)X;
212  (void)Y;
213  (void)alpha;
214  (void)beta;
215 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216  const auto& X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
217  const auto& Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
218 
219  // Only does something if #rhs > current capacity.
220  HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
221  // Switch alpha and beta because of HTS's opposite convention.
222  HTST::solve_omp(Timpl_.get(),
223  // For std/Kokkos::complex.
224  reinterpret_cast<const scalar_type*>(X_view.data()),
225  X_view.extent(1),
226  // For std/Kokkos::complex.
227  reinterpret_cast<scalar_type*>(Y_view.data()),
228  beta, alpha);
229 #endif
230  }
231 
232  private:
233 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
234  typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
235  typedef typename HTST::Impl TImpl;
236  typedef typename HTST::CrsMatrix HtsCrsMatrix;
237 
238  struct TImplDeleter {
239  void free(TImpl* impl) {
240  HTST::delete_Impl(impl);
241  }
242  };
243 
244  struct HtsCrsMatrixDeleter {
245  void free(HtsCrsMatrix* T) {
246  HTST::delete_CrsMatrix(T);
247  }
248  };
249 
250  Teuchos::RCP<TImpl> Timpl_;
251  bool transpose_, conjugate_;
252  int levelset_block_size_;
253 #endif
254 };
255 
256 template <class MatrixType>
259  : A_(A) {
260  initializeState();
261 
262  if (!A.is_null()) {
264  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
265  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
266  "Ifpack2::LocalSparseTriangularSolver constructor: "
267  "The input matrix A is not a Tpetra::CrsMatrix.");
268  A_crs_ = A_crs;
269  }
270 }
271 
272 template <class MatrixType>
276  : A_(A)
277  , out_(out) {
278  initializeState();
279  if (!out_.is_null()) {
280  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
281  << std::endl;
282  }
283 
284  if (!A.is_null()) {
286  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
287  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
288  "Ifpack2::LocalSparseTriangularSolver constructor: "
289  "The input matrix A is not a Tpetra::CrsMatrix.");
290  A_crs_ = A_crs;
291  }
292 }
293 
294 template <class MatrixType>
297  initializeState();
298 }
299 
300 template <class MatrixType>
303  : out_(out) {
304  initializeState();
305  if (!out_.is_null()) {
306  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
307  << std::endl;
308  }
309 }
310 
311 template <class MatrixType>
313  isInitialized_ = false;
314  isComputed_ = false;
315  reverseStorage_ = false;
316  isInternallyChanged_ = false;
317  numInitialize_ = 0;
318  numCompute_ = 0;
319  numApply_ = 0;
320  initializeTime_ = 0.0;
321  computeTime_ = 0.0;
322  applyTime_ = 0.0;
323  isKokkosKernelsSptrsv_ = false;
324  isKokkosKernelsStream_ = false;
325  num_streams_ = 0;
326  uplo_ = "N";
327  diag_ = "N";
328 }
329 
330 template <class MatrixType>
333  if (!isKokkosKernelsStream_) {
334  if (Teuchos::nonnull(kh_)) {
335  kh_->destroy_sptrsv_handle();
336  }
337  } else {
338  for (size_t i = 0; i < kh_v_.size(); i++) {
339  if (Teuchos::nonnull(kh_v_[i])) {
340  kh_v_[i]->destroy_sptrsv_handle();
341  }
342  }
343  }
344 }
345 
346 template <class MatrixType>
349  using Teuchos::Array;
351  using Teuchos::RCP;
352 
353  Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
354  do {
355  static const char typeName[] = "trisolver: type";
356 
357  if (!pl.isType<std::string>(typeName)) break;
358 
359  // Map std::string <-> TrisolverType::Enum.
360  Array<std::string> trisolverTypeStrs;
361  Array<Details::TrisolverType::Enum> trisolverTypeEnums;
362  Details::TrisolverType::loadPLTypeOption(trisolverTypeStrs, trisolverTypeEnums);
364  s2i(trisolverTypeStrs(), trisolverTypeEnums(), typeName, false);
365 
366  trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
367  } while (0);
368 
369  if (trisolverType == Details::TrisolverType::HTS) {
370  htsImpl_ = Teuchos::rcp(new HtsImpl());
371  htsImpl_->setParameters(pl);
372  }
373 
374  if (trisolverType == Details::TrisolverType::KSPTRSV) {
375  this->isKokkosKernelsSptrsv_ = true;
376  } else {
377  this->isKokkosKernelsSptrsv_ = false;
378  }
379 
380  if (pl.isParameter("trisolver: reverse U"))
381  reverseStorage_ = pl.get<bool>("trisolver: reverse U");
382 
383  TEUCHOS_TEST_FOR_EXCEPTION(reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
384  std::logic_error,
385  "Ifpack2::LocalSparseTriangularSolver::setParameters: "
386  "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
387  "options. See GitHub issue #2647.");
388 }
389 
390 template <class MatrixType>
393  using Tpetra::Details::determineLocalTriangularStructure;
394 
395  using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
396  using LO = local_ordinal_type;
397 
398  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
399  if (!out_.is_null()) {
400  *out_ << ">>> DEBUG " << prefix << std::endl;
401  }
402 
403  if (!isKokkosKernelsStream_) {
404  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
405  "setMatrix() with a nonnull input matrix before you may call "
406  "initialize() or compute().");
407  if (A_crs_.is_null()) {
408  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
409  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.get() == nullptr, std::invalid_argument,
410  prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
411  A_crs_ = A_crs;
412  }
413  auto G = A_crs_->getGraph();
414  TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
415  "but A_crs_'s RowGraph G is null. "
416  "Please report this bug to the Ifpack2 developers.");
417  // At this point, the graph MUST be fillComplete. The "initialize"
418  // (symbolic) part of setup only depends on the graph structure, so
419  // the matrix itself need not be fill complete.
420  TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
421  "If you call this method, "
422  "the matrix's graph must be fill complete. It is not.");
423 
424  // mfh 30 Apr 2018: See GitHub Issue #2658.
425  constexpr bool ignoreMapsForTriStructure = true;
426  auto lclTriStructure = [&] {
427  auto lclMatrix = A_crs_->getLocalMatrixDevice();
428  auto lclRowMap = A_crs_->getRowMap()->getLocalMap();
429  auto lclColMap = A_crs_->getColMap()->getLocalMap();
430  auto lclTriStruct =
431  determineLocalTriangularStructure(lclMatrix.graph,
432  lclRowMap,
433  lclColMap,
434  ignoreMapsForTriStructure);
435  const LO lclNumRows = lclRowMap.getLocalNumElements();
436  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
437  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" : (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
438  return lclTriStruct;
439  }();
440 
441  if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
442  htsImpl_.is_null()) {
443  // Reverse the storage for an upper triangular matrix
444  auto Alocal = A_crs_->getLocalMatrixDevice();
445  auto ptr = Alocal.graph.row_map;
446  auto ind = Alocal.graph.entries;
447  auto val = Alocal.values;
448 
449  auto numRows = Alocal.numRows();
450  auto numCols = Alocal.numCols();
451  auto numNnz = Alocal.nnz();
452 
453  typename decltype(ptr)::non_const_type newptr("ptr", ptr.extent(0));
454  typename decltype(ind)::non_const_type newind("ind", ind.extent(0));
455  decltype(val) newval("val", val.extent(0));
456 
457  // FIXME: The code below assumes UVM
458  typename crs_matrix_type::execution_space().fence();
459  newptr(0) = 0;
460  for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
461  auto A_r = Alocal.row(numRows - 1 - row);
462 
463  auto numEnt = A_r.length;
464  for (local_ordinal_type k = 0; k < numEnt; ++k) {
465  newind(rowStart + k) = numCols - 1 - A_r.colidx(numEnt - 1 - k);
466  newval(rowStart + k) = A_r.value(numEnt - 1 - k);
467  }
468  rowStart += numEnt;
469  newptr(row + 1) = rowStart;
470  }
471  typename crs_matrix_type::execution_space().fence();
472 
473  // Reverse maps
474  Teuchos::RCP<map_type> newRowMap, newColMap;
475  {
476  // Reverse row map
477  auto rowMap = A_->getRowMap();
478  auto numElems = rowMap->getLocalNumElements();
479  auto rowElems = rowMap->getLocalElementList();
480 
481  Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
482  for (size_t i = 0; i < numElems; i++)
483  newRowElems[i] = rowElems[numElems - 1 - i];
484 
485  newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
486  }
487  {
488  // Reverse column map
489  auto colMap = A_->getColMap();
490  auto numElems = colMap->getLocalNumElements();
491  auto colElems = colMap->getLocalElementList();
492 
493  Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
494  for (size_t i = 0; i < numElems; i++)
495  newColElems[i] = colElems[numElems - 1 - i];
496 
497  newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
498  }
499 
500  // Construct new matrix
501  local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
502 
503  A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
504 
505  isInternallyChanged_ = true;
506 
507  // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
508  // didn't want to break any invariants, especially considering
509  // that this branch is likely poorly tested.
510  auto newLclTriStructure =
511  determineLocalTriangularStructure(newLocalMatrix.graph,
512  newRowMap->getLocalMap(),
513  newColMap->getLocalMap(),
514  ignoreMapsForTriStructure);
515  const LO newLclNumRows = newRowMap->getLocalNumElements();
516  this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
517  this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" : (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
518  }
519  } else {
520  bool prev_ambiguous = false;
521  bool all_ambiguous = true;
522  for (int i = 0; i < num_streams_; i++) {
523  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
524  "setMatrix() with a nonnull input matrix before you may call "
525  "initialize() or compute().");
526  auto G = A_crs_v_[i]->getGraph();
527  TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_crs_ are nonnull, "
528  "but A_crs_'s RowGraph G is null. "
529  "Please report this bug to the Ifpack2 developers.");
530  // At this point, the graph MUST be fillComplete. The "initialize"
531  // (symbolic) part of setup only depends on the graph structure, so
532  // the matrix itself need not be fill complete.
533  TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
534  "If you call this method, "
535  "the matrix's graph must be fill complete. It is not.");
536 
537  // mfh 30 Apr 2018: See GitHub Issue #2658.
538  constexpr bool ignoreMapsForTriStructure = true;
539  std::string prev_uplo = this->uplo_;
540  std::string prev_diag = this->diag_;
541  auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice();
542  auto lclRowMap = A_crs_v_[i]->getRowMap()->getLocalMap();
543  auto lclColMap = A_crs_v_[i]->getColMap()->getLocalMap();
544  auto lclTriStruct =
545  determineLocalTriangularStructure(lclMatrix.graph,
546  lclRowMap,
547  lclColMap,
548  ignoreMapsForTriStructure);
549  const LO lclNumRows = lclRowMap.getLocalNumElements();
550  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
551  const bool could_be_lower = lclTriStruct.couldBeLowerTriangular;
552  const bool could_be_upper = lclTriStruct.couldBeUpperTriangular;
553  if (could_be_lower && could_be_upper) {
554  // Ambiguous, but that's OK if at least one stream is unabiguous
555  this->uplo_ = prev_uplo;
556  prev_ambiguous = true;
557  } else {
558  this->uplo_ = could_be_lower ? "L" : (could_be_upper ? "U" : "N");
559  if (this->uplo_ != "N" && prev_uplo == "N" && prev_ambiguous) {
560  prev_uplo = this->uplo_;
561  }
562  prev_ambiguous = false;
563  }
564  all_ambiguous &= prev_ambiguous;
565  if (i > 0) {
566  TEUCHOS_TEST_FOR_EXCEPTION((this->diag_ != prev_diag) || (this->uplo_ != prev_uplo),
567  std::logic_error, prefix << "A_crs_'s structures in streams "
568  "are different. Please report this bug to the Ifpack2 developers.");
569  }
570  }
571  // If all streams were ambiguous, just call it "L"
572  if (all_ambiguous) {
573  this->uplo_ = "L";
574  }
575  }
576 
577  if (Teuchos::nonnull(htsImpl_)) {
578  htsImpl_->initialize(*A_crs_);
579  isInternallyChanged_ = true;
580  }
581 
582  const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
583  kh_v_nonnull_ = false;
584  if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ != "U") {
585  if (!isKokkosKernelsStream_) {
586  kh_ = Teuchos::rcp(new k_handle());
587  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
588 
589  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_, true);
590  auto Alocal = A_crs->getLocalMatrixDevice();
591  auto ptr = Alocal.graph.row_map;
592  auto ind = Alocal.graph.entries;
593  auto val = Alocal.values;
594 
595  auto numRows = Alocal.numRows();
596  kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
597  KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
598  } else {
599  kh_v_ = std::vector<Teuchos::RCP<k_handle>>(num_streams_);
600  for (int i = 0; i < num_streams_; i++) {
601  kh_v_[i] = Teuchos::rcp(new k_handle());
602  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
603  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
604  auto ptr_i = Alocal_i.graph.row_map;
605  auto ind_i = Alocal_i.graph.entries;
606  auto val_i = Alocal_i.values;
607 
608  auto numRows_i = Alocal_i.numRows();
609 
610  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
611  kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
612  KokkosSparse::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
613  }
614  kh_v_nonnull_ = true;
615  }
616  }
617 
618  isInitialized_ = true;
619  ++numInitialize_;
620 }
621 
622 template <class MatrixType>
623 KokkosSparse::Experimental::SPTRSVAlgorithm
625 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
626  // CuSparse only supports int type ordinals
627  // and scalar types of float, double, float complex and double complex
628  if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
629  std::is_same<int, local_ordinal_type>::value &&
630  (std::is_same<scalar_type, float>::value ||
631  std::is_same<scalar_type, double>::value ||
632  std::is_same<impl_scalar_type, Kokkos::complex<float>>::value ||
633  std::is_same<impl_scalar_type, Kokkos::complex<double>>::value)) {
634  return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
635  }
636 #endif
637  return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
638 }
639 
640 template <class MatrixType>
643  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
644  if (!out_.is_null()) {
645  *out_ << ">>> DEBUG " << prefix << std::endl;
646  }
647 
648  if (!isKokkosKernelsStream_) {
649  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
650  "setMatrix() with a nonnull input matrix before you may call "
651  "initialize() or compute().");
652  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_ is nonnull, but "
653  "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
654  // At this point, the matrix MUST be fillComplete.
655  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
656  "If you call this "
657  "method, the matrix must be fill complete. It is not.");
658  } else {
659  for (int i = 0; i < num_streams_; i++) {
660  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
661  "setMatrices() with a nonnull input matrix before you may call "
662  "initialize() or compute().");
663  // At this point, the matrix MUST be fillComplete.
664  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
665  "If you call this "
666  "method, the matrix must be fill complete. It is not.");
667  }
668  }
669 
670  if (!isInitialized_) {
671  initialize();
672  }
673  TEUCHOS_TEST_FOR_EXCEPTION(!isInitialized_, std::logic_error, prefix << "initialize() should have "
674  "been called by this point, but isInitialized_ is false. "
675  "Please report this bug to the Ifpack2 developers.");
676 
677 // NOTE (Nov-09-2022):
678 // For Cuda >= 11.3 (using cusparseSpSV), always call symbolic during compute
679 // even when matrix values are changed with the same sparsity pattern.
680 // For Cuda >= 12.1 has a new cusparseSpSV_updateMatrix function just for updating the
681 // values that is substantially faster.
682 // This would all be much better handled via a KokkosSparse::sptrsv_numeric(...)
683 // that could hide the Cuda implementation details.
684 #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
685  if constexpr (std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda>) {
686  if (this->isKokkosKernelsSptrsv_) {
687  if (Teuchos::nonnull(kh_) && !isKokkosKernelsStream_) {
688  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_, true);
689  auto Alocal = A_crs->getLocalMatrixDevice();
690  auto val = Alocal.values;
691  auto ptr = Alocal.graph.row_map;
692  auto ind = Alocal.graph.entries;
693  KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
694  } else if (kh_v_nonnull_) {
695  for (int i = 0; i < num_streams_; i++) {
696  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
697  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
698  auto val_i = Alocal_i.values;
699  auto ptr_i = Alocal_i.graph.row_map;
700  auto ind_i = Alocal_i.graph.entries;
701  KokkosSparse::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
702  }
703  }
704  }
705  }
706 #endif
707 
708  if (!isComputed_) { // Only compute if not computed before
709  if (Teuchos::nonnull(htsImpl_))
710  htsImpl_->compute(*A_crs_, out_);
711 
712  isComputed_ = true;
713  ++numCompute_;
714  }
715 }
716 
717 template <class MatrixType>
719  apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type,
721  Tpetra::MultiVector<scalar_type, local_ordinal_type,
723  Teuchos::ETransp mode,
724  scalar_type alpha,
725  scalar_type beta) const {
726  using Teuchos::RCP;
727  using Teuchos::rcp;
728  using Teuchos::rcpFromRef;
729  typedef scalar_type ST;
730  typedef Teuchos::ScalarTraits<ST> STS;
731  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
732 
733  if (!out_.is_null()) {
734  *out_ << ">>> DEBUG " << prefix;
735  if (!isKokkosKernelsStream_) {
736  if (A_crs_.is_null()) {
737  *out_ << "A_crs_ is null!" << std::endl;
738  } else {
740  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
741  const std::string uplo = this->uplo_;
742  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
743  const std::string diag = this->diag_;
744  *out_ << "uplo=\"" << uplo
745  << "\", trans=\"" << trans
746  << "\", diag=\"" << diag << "\"" << std::endl;
747  }
748  } else {
749  for (int i = 0; i < num_streams_; i++) {
750  if (A_crs_v_[i].is_null()) {
751  *out_ << "A_crs_v_[" << i << "]"
752  << " is null!" << std::endl;
753  } else {
754  const std::string uplo = this->uplo_;
755  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
756  const std::string diag = this->diag_;
757  *out_ << "A_crs_v_[" << i << "]: "
758  << "uplo=\"" << uplo
759  << "\", trans=\"" << trans
760  << "\", diag=\"" << diag << "\"" << std::endl;
761  }
762  }
763  }
764  }
765 
766  TEUCHOS_TEST_FOR_EXCEPTION(!isComputed(), std::runtime_error, prefix << "If compute() has not yet "
767  "been called, or if you have changed the matrix via setMatrix(), you must "
768  "call compute() before you may call this method.");
769  // If isComputed() is true, it's impossible for the matrix to be
770  // null, or for it not to be a Tpetra::CrsMatrix.
771  if (!isKokkosKernelsStream_) {
772  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::logic_error, prefix << "A_ is null. "
773  "Please report this bug to the Ifpack2 developers.");
774  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_crs_ is null. "
775  "Please report this bug to the Ifpack2 developers.");
776  // However, it _is_ possible that the user called resumeFill() on
777  // the matrix, after calling compute(). This is NOT allowed.
778  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
779  "If you call this "
780  "method, the matrix must be fill complete. It is not. This means that "
781  " you must have called resumeFill() on the matrix before calling apply(). "
782  "This is NOT allowed. Note that this class may use the matrix's data in "
783  "place without copying it. Thus, you cannot change the matrix and expect "
784  "the solver to stay the same. If you have changed the matrix, first call "
785  "fillComplete() on it, then call compute() on this object, before you call"
786  " apply(). You do NOT need to call setMatrix, as long as the matrix "
787  "itself (that is, its address in memory) is the same.");
788  } else {
789  for (int i = 0; i < num_streams_; i++) {
790  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::logic_error, prefix << "A_crs_ is null. "
791  "Please report this bug to the Ifpack2 developers.");
792  // However, it _is_ possible that the user called resumeFill() on
793  // the matrix, after calling compute(). This is NOT allowed.
794  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
795  "If you call this "
796  "method, the matrix must be fill complete. It is not. This means that "
797  " you must have called resumeFill() on the matrix before calling apply(). "
798  "This is NOT allowed. Note that this class may use the matrix's data in "
799  "place without copying it. Thus, you cannot change the matrix and expect "
800  "the solver to stay the same. If you have changed the matrix, first call "
801  "fillComplete() on it, then call compute() on this object, before you call"
802  " apply(). You do NOT need to call setMatrix, as long as the matrix "
803  "itself (that is, its address in memory) is the same.");
804  }
805  }
806 
807  RCP<const MV> X_cur;
808  RCP<MV> Y_cur;
809 
810  if (!isKokkosKernelsStream_) {
811  auto G = A_crs_->getGraph();
812  TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
813  "but A_crs_'s RowGraph G is null. "
814  "Please report this bug to the Ifpack2 developers.");
815  auto importer = G->getImporter();
816  auto exporter = G->getExporter();
817 
818  if (!importer.is_null()) {
819  if (X_colMap_.is_null() || X_colMap_->getNumVectors() != X.getNumVectors()) {
820  X_colMap_ = rcp(new MV(importer->getTargetMap(), X.getNumVectors()));
821  } else {
822  X_colMap_->putScalar(STS::zero());
823  }
824  // See discussion of Github Issue #672 for why the Import needs to
825  // use the ZERO CombineMode. The case where the Export is
826  // nontrivial is likely never exercised.
827  X_colMap_->doImport(X, *importer, Tpetra::ZERO);
828  }
829  X_cur = importer.is_null() ? rcpFromRef(X) : Teuchos::rcp_const_cast<const MV>(X_colMap_);
830 
831  if (!exporter.is_null()) {
832  if (Y_rowMap_.is_null() || Y_rowMap_->getNumVectors() != Y.getNumVectors()) {
833  Y_rowMap_ = rcp(new MV(exporter->getSourceMap(), Y.getNumVectors()));
834  } else {
835  Y_rowMap_->putScalar(STS::zero());
836  }
837  Y_rowMap_->doExport(Y, *importer, Tpetra::ADD);
838  }
839  Y_cur = exporter.is_null() ? rcpFromRef(Y) : Y_rowMap_;
840  } else {
841  // Currently assume X and Y are local vectors (same sizes as A_crs_).
842  // Should do a better job here!!!
843  X_cur = rcpFromRef(X);
844  Y_cur = rcpFromRef(Y);
845  }
846 
847  localApply(*X_cur, *Y_cur, mode, alpha, beta);
848 
849  if (!isKokkosKernelsStream_) {
850  auto G = A_crs_->getGraph();
851  auto exporter = G->getExporter();
852  if (!exporter.is_null()) {
853  Y.putScalar(STS::zero());
854  Y.doExport(*Y_cur, *exporter, Tpetra::ADD);
855  }
856  }
857  ++numApply_;
858 }
859 
860 template <class MatrixType>
862  localTriangularSolve(const MV& Y,
863  MV& X,
864  const Teuchos::ETransp mode) const {
865  using Teuchos::CONJ_TRANS;
866  using Teuchos::NO_TRANS;
867  using Teuchos::TRANS;
868  const char tfecfFuncName[] = "localTriangularSolve: ";
869 
870  if (!isKokkosKernelsStream_) {
871  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_->isFillComplete(), std::runtime_error,
872  "The matrix is not fill complete.");
873  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
874  "The matrix is neither upper triangular or lower triangular. "
875  "You may only call this method if the matrix is triangular. "
876  "Remember that this is a local (per MPI process) property, and that "
877  "Tpetra only knows how to do a local (per process) triangular solve.");
878  } else {
879  for (int i = 0; i < num_streams_; i++) {
880  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
881  "The matrix is not fill complete.");
882  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
883  "The matrix is neither upper triangular or lower triangular. "
884  "You may only call this method if the matrix is triangular. "
885  "Remember that this is a local (per MPI process) property, and that "
886  "Tpetra only knows how to do a local (per process) triangular solve.");
887  }
888  }
889  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!X.isConstantStride() || !Y.isConstantStride(), std::invalid_argument,
890  "X and Y must be constant stride.");
892  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(STS::isComplex && mode == TRANS, std::logic_error,
893  "This method does "
894  "not currently support non-conjugated transposed solve (mode == "
895  "Teuchos::TRANS) for complex scalar types.");
896 
897  const std::string uplo = this->uplo_;
898  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
899  const size_t numVecs = std::min(X.getNumVectors(), Y.getNumVectors());
900 
901  if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N") {
902  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(this->A_);
903  auto A_lclk = A_crs->getLocalMatrixDevice();
904  auto ptr = A_lclk.graph.row_map;
905  auto ind = A_lclk.graph.entries;
906  auto val = A_lclk.values;
907 
908  for (size_t j = 0; j < numVecs; ++j) {
909  auto X_j = X.getVectorNonConst(j);
910  auto Y_j = Y.getVector(j);
911  auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
912  auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
913  auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
914  auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
915  KokkosSparse::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
916  // TODO is this fence needed...
917  typename k_handle::HandleExecSpace().fence();
918  }
919  } // End using regular interface of Kokkos Kernels Sptrsv
920  else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans == "N") {
921  std::vector<lno_row_view_t> ptr_v(num_streams_);
922  std::vector<lno_nonzero_view_t> ind_v(num_streams_);
923  std::vector<scalar_nonzero_view_t> val_v(num_streams_);
924  std::vector<k_handle*> KernelHandle_rawptr_v_(num_streams_);
925  for (size_t j = 0; j < numVecs; ++j) {
926  auto X_j = X.getVectorNonConst(j);
927  auto Y_j = Y.getVector(j);
928  auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
929  auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
930  auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
931  auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
932  std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
933  std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
934  local_ordinal_type stream_begin = 0;
935  local_ordinal_type stream_end;
936  for (int i = 0; i < num_streams_; i++) {
937  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i]);
938  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
939  ptr_v[i] = Alocal_i.graph.row_map;
940  ind_v[i] = Alocal_i.graph.entries;
941  val_v[i] = Alocal_i.values;
942  stream_end = stream_begin + Alocal_i.numRows();
943  x_v[i] = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
944  y_v[i] = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
945  KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
946  stream_begin = stream_end;
947  }
948  Kokkos::fence();
949  KokkosSparse::Experimental::sptrsv_solve_streams(exec_space_instances_, KernelHandle_rawptr_v_,
950  ptr_v, ind_v, val_v, y_v, x_v);
951  for (int i = 0; i < num_streams_; i++) exec_space_instances_[i].fence();
952  // Kokkos::fence();
953  }
954  } // End using stream interface of Kokkos Kernels Sptrsv
955  else {
956  const std::string diag = this->diag_;
957  // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
958  // sequential, host-only code. See
959  // https://github.com/kokkos/kokkos-kernels/issues/48.
960 
961  auto A_lcl = this->A_crs_->getLocalMatrixHost();
962 
963  if (X.isConstantStride() && Y.isConstantStride()) {
964  auto X_lcl = X.getLocalViewHost(Tpetra::Access::ReadWrite);
965  auto Y_lcl = Y.getLocalViewHost(Tpetra::Access::ReadOnly);
966  KokkosSparse::trsv(uplo.c_str(), trans.c_str(), diag.c_str(),
967  A_lcl, Y_lcl, X_lcl);
968  } else {
969  for (size_t j = 0; j < numVecs; ++j) {
970  auto X_j = X.getVectorNonConst(j);
971  auto Y_j = Y.getVector(j);
972  auto X_lcl = X_j->getLocalViewHost(Tpetra::Access::ReadWrite);
973  auto Y_lcl = Y_j->getLocalViewHost(Tpetra::Access::ReadOnly);
974  KokkosSparse::trsv(uplo.c_str(), trans.c_str(),
975  diag.c_str(), A_lcl, Y_lcl, X_lcl);
976  }
977  }
978  }
979 }
980 
981 template <class MatrixType>
982 void LocalSparseTriangularSolver<MatrixType>::
983  localApply(const MV& X,
984  MV& Y,
985  const Teuchos::ETransp mode,
986  const scalar_type& alpha,
987  const scalar_type& beta) const {
988  if (mode == Teuchos::NO_TRANS && Teuchos::nonnull(htsImpl_) &&
989  htsImpl_->isComputed()) {
990  htsImpl_->localApply(X, Y, mode, alpha, beta);
991  return;
992  }
993 
994  using Teuchos::RCP;
995  typedef scalar_type ST;
996  typedef Teuchos::ScalarTraits<ST> STS;
997 
998  if (beta == STS::zero()) {
999  if (alpha == STS::zero()) {
1000  Y.putScalar(STS::zero()); // Y := 0 * Y (ignore contents of Y)
1001  } else { // alpha != 0
1002  this->localTriangularSolve(X, Y, mode);
1003  if (alpha != STS::one()) {
1004  Y.scale(alpha);
1005  }
1006  }
1007  } else { // beta != 0
1008  if (alpha == STS::zero()) {
1009  Y.scale(beta); // Y := beta * Y
1010  } else { // alpha != 0
1011  MV Y_tmp(Y, Teuchos::Copy);
1012  this->localTriangularSolve(X, Y_tmp, mode); // Y_tmp := M * X
1013  Y.update(alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
1014  }
1015  }
1016 }
1017 
1018 template <class MatrixType>
1021  return numInitialize_;
1022 }
1023 
1024 template <class MatrixType>
1027  return numCompute_;
1028 }
1029 
1030 template <class MatrixType>
1032  getNumApply() const {
1033  return numApply_;
1034 }
1035 
1036 template <class MatrixType>
1037 double
1040  return initializeTime_;
1041 }
1042 
1043 template <class MatrixType>
1044 double
1047  return computeTime_;
1048 }
1049 
1050 template <class MatrixType>
1051 double
1053  getApplyTime() const {
1054  return applyTime_;
1055 }
1056 
1057 template <class MatrixType>
1058 std::string
1060  description() const {
1061  std::ostringstream os;
1062 
1063  // Output is a valid YAML dictionary in flow style. If you don't
1064  // like everything on a single line, you should call describe()
1065  // instead.
1066  os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
1067  if (this->getObjectLabel() != "") {
1068  os << "Label: \"" << this->getObjectLabel() << "\", ";
1069  }
1070  os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
1071  << "Computed: " << (isComputed() ? "true" : "false") << ", ";
1072 
1073  if (isKokkosKernelsSptrsv_) os << "KK-SPTRSV, ";
1074  if (isKokkosKernelsStream_) os << "KK-SolveStream, ";
1075 
1076  if (A_.is_null()) {
1077  os << "Matrix: null";
1078  } else {
1079  os << "Matrix dimensions: ["
1080  << A_->getGlobalNumRows() << ", "
1081  << A_->getGlobalNumCols() << "]"
1082  << ", Number of nonzeros: " << A_->getGlobalNumEntries();
1083  }
1084 
1085  if (Teuchos::nonnull(htsImpl_))
1086  os << ", HTS computed: " << (htsImpl_->isComputed() ? "true" : "false");
1087  os << "}";
1088  return os.str();
1089 }
1090 
1091 template <class MatrixType>
1094  const Teuchos::EVerbosityLevel verbLevel) const {
1095  using std::endl;
1096  // Default verbosity level is VERB_LOW, which prints only on Process
1097  // 0 of the matrix's communicator.
1098  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
1099 
1100  if (vl != Teuchos::VERB_NONE) {
1101  // Print only on Process 0 in the matrix's communicator. If the
1102  // matrix is null, though, we have to get the communicator from
1103  // somewhere, so we ask Tpetra for its default communicator. If
1104  // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
1105  auto comm = A_.is_null() ? Tpetra::getDefaultComm() : A_->getComm();
1106 
1107  // Users aren't supposed to do anything with the matrix on
1108  // processes where its communicator is null.
1109  if (!comm.is_null() && comm->getRank() == 0) {
1110  // By convention, describe() should always begin with a tab.
1111  Teuchos::OSTab tab0(out);
1112  // Output is in YAML format. We have to escape the class name,
1113  // because it has a colon.
1114  out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1115  Teuchos::OSTab tab1(out);
1116  out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name() << endl
1117  << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name() << endl
1118  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name() << endl
1119  << "Node: " << Teuchos::TypeNameTraits<node_type>::name() << endl;
1120  }
1121  }
1122 }
1123 
1124 template <class MatrixType>
1127  getDomainMap() const {
1128  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1129  "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1130  "The matrix is null. Please call setMatrix() with a nonnull input "
1131  "before calling this method.");
1132  return A_->getDomainMap();
1133 }
1134 
1135 template <class MatrixType>
1138  getRangeMap() const {
1139  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1140  "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1141  "The matrix is null. Please call setMatrix() with a nonnull input "
1142  "before calling this method.");
1143  return A_->getRangeMap();
1144 }
1145 
1146 template <class MatrixType>
1149  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1150 
1151  // If the pointer didn't change, do nothing. This is reasonable
1152  // because users are supposed to call this method with the same
1153  // object over all participating processes, and pointer identity
1154  // implies object identity.
1155  if (A.getRawPtr() != A_.getRawPtr() || isInternallyChanged_) {
1156  // Check in serial or one-process mode if the matrix is square.
1157  TEUCHOS_TEST_FOR_EXCEPTION(!A.is_null() && A->getComm()->getSize() == 1 &&
1158  A->getLocalNumRows() != A->getLocalNumCols(),
1159  std::runtime_error, prefix << "If A's communicator only contains one "
1160  "process, then A must be square. Instead, you provided a matrix A with "
1161  << A->getLocalNumRows() << " rows and " << A->getLocalNumCols() << " columns.");
1162 
1163  // It's legal for A to be null; in that case, you may not call
1164  // initialize() until calling setMatrix() with a nonnull input.
1165  // Regardless, setting the matrix invalidates the preconditioner.
1166  isInitialized_ = false;
1167  isComputed_ = false;
1168 
1169  if (A.is_null()) {
1170  A_crs_ = Teuchos::null;
1171  A_ = Teuchos::null;
1172  } else { // A is not null
1174  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
1175  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1176  A_crs_ = A_crs;
1177  A_ = A;
1178  }
1179 
1180  if (Teuchos::nonnull(htsImpl_))
1181  htsImpl_->reset();
1182  } // pointers are not the same
1183 }
1184 
1185 template <class MatrixType>
1187  setStreamInfo(const bool& isKokkosKernelsStream, const int& num_streams,
1188  const std::vector<HandleExecSpace>& exec_space_instances) {
1189  isKokkosKernelsStream_ = isKokkosKernelsStream;
1190  num_streams_ = num_streams;
1191  exec_space_instances_ = exec_space_instances;
1192  A_crs_v_ = std::vector<Teuchos::RCP<crs_matrix_type>>(num_streams_);
1193 }
1194 
1195 template <class MatrixType>
1197  setMatrices(const std::vector<Teuchos::RCP<crs_matrix_type>>& A_crs_v) {
1198  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1199 
1200  for (int i = 0; i < num_streams_; i++) {
1201  // If the pointer didn't change, do nothing. This is reasonable
1202  // because users are supposed to call this method with the same
1203  // object over all participating processes, and pointer identity
1204  // implies object identity.
1205  if (A_crs_v[i].getRawPtr() != A_crs_v_[i].getRawPtr() || isInternallyChanged_) {
1206  // Check in serial or one-process mode if the matrix is square.
1207  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v[i].is_null() && A_crs_v[i]->getComm()->getSize() == 1 &&
1208  A_crs_v[i]->getLocalNumRows() != A_crs_v[i]->getLocalNumCols(),
1209  std::runtime_error, prefix << "If A's communicator only contains one "
1210  "process, then A must be square. Instead, you provided a matrix A with "
1211  << A_crs_v[i]->getLocalNumRows() << " rows and " << A_crs_v[i]->getLocalNumCols() << " columns.");
1212 
1213  // It's legal for A to be null; in that case, you may not call
1214  // initialize() until calling setMatrix() with a nonnull input.
1215  // Regardless, setting the matrix invalidates the preconditioner.
1216  isInitialized_ = false;
1217  isComputed_ = false;
1218 
1219  if (A_crs_v[i].is_null()) {
1220  A_crs_v_[i] = Teuchos::null;
1221  } else { // A is not null
1223  Teuchos::rcp_dynamic_cast<crs_matrix_type>(A_crs_v[i]);
1224  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1225  A_crs_v_[i] = A_crs;
1226  }
1227  } // pointers are not the same
1228  }
1229 }
1230 
1231 } // namespace Ifpack2
1232 
1233 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S, LO, GO, N) \
1234  template class Ifpack2::LocalSparseTriangularSolver<Tpetra::RowMatrix<S, LO, GO, N>>;
1235 
1236 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:719
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1026
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1060
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
&quot;Numeric&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:642
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:191
T * get() const
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1053
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1046
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1039
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
bool isParameter(const std::string &name) const
T * getRawPtr() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner&#39;s matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1197
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1127
&quot;Preconditioner&quot; that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void initialize()
&quot;Symbolic&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:392
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1020
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver&#39;s stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1187
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner&#39;s matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1148
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string &paramName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1032
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1138
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:77
void setParameters(const Teuchos::ParameterList &params)
Set this object&#39;s parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:348
MatrixType::scalar_type scalar_type
Scalar type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:56
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1093
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:332
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:296
static std::string name()
std::string typeName(const T &t)
bool is_null() const