Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_LocalSparseTriangularSolver_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
12 
13 #include <sstream> // ostringstream
14 #include <stdexcept> // runtime_error
15 
16 #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
17 #include "Tpetra_CrsMatrix.hpp"
18 #include "Tpetra_Core.hpp"
19 #include "Teuchos_StandardParameterEntryValidators.hpp"
20 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
21 #include "KokkosSparse_trsv.hpp"
22 
23 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
24 #include "shylu_hts.hpp"
25 #endif
26 
27 namespace Ifpack2 {
28 
29 namespace Details {
30 
31 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
32 
33 inline void cusparse_error_throw(cusparseStatus_t cusparseStatus, const char* name,
34  const char* file, const int line) {
35  std::ostringstream out;
36 #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)
37  out << name << " error( " << cusparseGetErrorName(cusparseStatus) << "): " << cusparseGetErrorString(cusparseStatus);
38 #else
39  out << name << " error( ";
40  switch (cusparseStatus) {
41  case CUSPARSE_STATUS_NOT_INITIALIZED:
42  out << "CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not "
43  "created correctly.";
44  break;
45  case CUSPARSE_STATUS_ALLOC_FAILED:
46  out << "CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too "
47  "much memory";
48  break;
49  case CUSPARSE_STATUS_INVALID_VALUE: out << "CUSPARSE_STATUS_INVALID_VALUE)"; break;
50  case CUSPARSE_STATUS_ARCH_MISMATCH: out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; break;
51  case CUSPARSE_STATUS_MAPPING_ERROR: out << "CUSPARSE_STATUS_MAPPING_ERROR)"; break;
52  case CUSPARSE_STATUS_EXECUTION_FAILED: out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; break;
53  case CUSPARSE_STATUS_INTERNAL_ERROR: out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; break;
54  case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break;
55  case CUSPARSE_STATUS_ZERO_PIVOT: out << "CUSPARSE_STATUS_ZERO_PIVOT)"; break;
56  default: out << "unrecognized error code): this is bad!"; break;
57  }
58 #endif // CUSPARSE_VERSION
59  if (file) {
60  out << " " << file << ":" << line;
61  }
62  throw std::runtime_error(out.str());
63 }
64 
65 inline void cusparse_safe_call(cusparseStatus_t cusparseStatus, const char* name, const char* file = nullptr,
66  const int line = 0) {
67  if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) {
68  cusparse_error_throw(cusparseStatus, name, file, line);
69  }
70 }
71 
72 #define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \
73  Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__)
74 
75 #endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
76 
77 struct TrisolverType {
78  enum Enum {
79  Internal,
80  HTS,
81  KSPTRSV
82  };
83 
84  static void loadPLTypeOption(Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
85  type_strs.resize(3);
86  type_strs[0] = "Internal";
87  type_strs[1] = "HTS";
88  type_strs[2] = "KSPTRSV";
89  type_enums.resize(3);
90  type_enums[0] = Internal;
91  type_enums[1] = HTS;
92  type_enums[2] = KSPTRSV;
93  }
94 };
95 } // namespace Details
96 
97 template <class MatrixType>
98 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
99  public:
100  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
101 
102  void reset() {
103 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
104  Timpl_ = Teuchos::null;
105  levelset_block_size_ = 1;
106 #endif
107  }
108 
109  void setParameters(const Teuchos::ParameterList& pl) {
110  (void)pl;
111 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
112  const char* block_size_s = "trisolver: block size";
113  if (pl.isParameter(block_size_s)) {
114  TEUCHOS_TEST_FOR_EXCEPT_MSG(!pl.isType<int>(block_size_s),
115  "The parameter \"" << block_size_s << "\" must be of type int.");
116  levelset_block_size_ = pl.get<int>(block_size_s);
117  }
118  if (levelset_block_size_ < 1)
119  levelset_block_size_ = 1;
120 #endif
121  }
122 
123  // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
124  // call to compute() will trigger the symbolic+numeric phase, and subsequent
125  // calls (with the same Timpl_) will trigger the numeric phase. In the call to
126  // initialize(), essentially nothing happens.
127  void initialize(const crs_matrix_type& /* unused */) {
128 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
129  reset();
130  transpose_ = conjugate_ = false;
131 #endif
132  }
133 
134  void compute(const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
135  (void)T_in;
136  (void)out;
137 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
138  using Teuchos::ArrayRCP;
139 
140  auto rowptr = T_in.getLocalRowPtrsHost();
141  auto colidx = T_in.getLocalIndicesHost();
142  auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
143  Kokkos::fence();
144 
145  Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
146  HTST::make_CrsMatrix(rowptr.size() - 1,
147  rowptr.data(), colidx.data(),
148  // For std/Kokkos::complex.
149  reinterpret_cast<const scalar_type*>(val.data()),
150  transpose_, conjugate_),
151  HtsCrsMatrixDeleter());
152 
153  if (Teuchos::nonnull(Timpl_)) {
154  // Reuse the nonzero pattern.
155  HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
156  } else {
157  // Build from scratch.
158  if (T_in.getCrsGraph().is_null()) {
159  if (Teuchos::nonnull(out))
160  *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
161  return;
162  }
163  if (!T_in.getCrsGraph()->isSorted()) {
164  if (Teuchos::nonnull(out))
165  *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
166  return;
167  }
168  if (!T_in.isStorageOptimized()) {
169  if (Teuchos::nonnull(out))
170  *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
171  return;
172  }
173 
174  typename HTST::PreprocessArgs args;
175  args.T = T_hts.get();
176  args.max_nrhs = 1;
177 #ifdef _OPENMP
178  args.nthreads = omp_get_max_threads();
179 #else
180  args.nthreads = 1;
181 #endif
182  args.save_for_reprocess = true;
183  typename HTST::Options opts;
184  opts.levelset_block_size = levelset_block_size_;
185  args.options = &opts;
186 
187  try {
188  Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
189  } catch (const std::exception& e) {
190  if (Teuchos::nonnull(out))
191  *out << "HTS preprocess threw: " << e.what() << "\n";
192  }
193  }
194 #endif
195  }
196 
197  // HTS may not be able to handle a matrix, so query whether compute()
198  // succeeded.
199  bool isComputed() {
200 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
201  return Teuchos::nonnull(Timpl_);
202 #else
203  return false;
204 #endif
205  }
206 
207  // Y := beta * Y + alpha * (M * X)
208  void localApply(const MV& X, MV& Y,
209  const Teuchos::ETransp /* mode */,
210  const scalar_type& alpha, const scalar_type& beta) const {
211  (void)X;
212  (void)Y;
213  (void)alpha;
214  (void)beta;
215 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216  const auto& X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
217  const auto& Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
218 
219  // Only does something if #rhs > current capacity.
220  HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
221  // Switch alpha and beta because of HTS's opposite convention.
222  HTST::solve_omp(Timpl_.get(),
223  // For std/Kokkos::complex.
224  reinterpret_cast<const scalar_type*>(X_view.data()),
225  X_view.extent(1),
226  // For std/Kokkos::complex.
227  reinterpret_cast<scalar_type*>(Y_view.data()),
228  beta, alpha);
229 #endif
230  }
231 
232  private:
233 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
234  typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
235  typedef typename HTST::Impl TImpl;
236  typedef typename HTST::CrsMatrix HtsCrsMatrix;
237 
238  struct TImplDeleter {
239  void free(TImpl* impl) {
240  HTST::delete_Impl(impl);
241  }
242  };
243 
244  struct HtsCrsMatrixDeleter {
245  void free(HtsCrsMatrix* T) {
246  HTST::delete_CrsMatrix(T);
247  }
248  };
249 
250  Teuchos::RCP<TImpl> Timpl_;
251  bool transpose_, conjugate_;
252  int levelset_block_size_;
253 #endif
254 };
255 
256 template <class MatrixType>
259  : A_(A) {
260  initializeState();
261 
262  if (!A.is_null()) {
264  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
265  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
266  "Ifpack2::LocalSparseTriangularSolver constructor: "
267  "The input matrix A is not a Tpetra::CrsMatrix.");
268  A_crs_ = A_crs;
269  }
270 }
271 
272 template <class MatrixType>
276  : A_(A)
277  , out_(out) {
278  initializeState();
279  if (!out_.is_null()) {
280  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
281  << std::endl;
282  }
283 
284  if (!A.is_null()) {
286  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
287  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument,
288  "Ifpack2::LocalSparseTriangularSolver constructor: "
289  "The input matrix A is not a Tpetra::CrsMatrix.");
290  A_crs_ = A_crs;
291  }
292 }
293 
294 template <class MatrixType>
297  initializeState();
298 }
299 
300 template <class MatrixType>
303  : out_(out) {
304  initializeState();
305  if (!out_.is_null()) {
306  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
307  << std::endl;
308  }
309 }
310 
311 template <class MatrixType>
313  isInitialized_ = false;
314  isComputed_ = false;
315  reverseStorage_ = false;
316  isInternallyChanged_ = false;
317  numInitialize_ = 0;
318  numCompute_ = 0;
319  numApply_ = 0;
320  initializeTime_ = 0.0;
321  computeTime_ = 0.0;
322  applyTime_ = 0.0;
323  isKokkosKernelsSptrsv_ = false;
324  isKokkosKernelsStream_ = false;
325  num_streams_ = 0;
326  uplo_ = "N";
327  diag_ = "N";
328 }
329 
330 template <class MatrixType>
333  if (!isKokkosKernelsStream_) {
334  if (Teuchos::nonnull(kh_)) {
335  kh_->destroy_sptrsv_handle();
336  }
337  } else {
338  for (size_t i = 0; i < kh_v_.size(); i++) {
339  if (Teuchos::nonnull(kh_v_[i])) {
340  kh_v_[i]->destroy_sptrsv_handle();
341  }
342  }
343  }
344 }
345 
346 template <class MatrixType>
349  using Teuchos::Array;
351  using Teuchos::RCP;
352 
353  Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
354  do {
355  static const char typeName[] = "trisolver: type";
356 
357  if (!pl.isType<std::string>(typeName)) break;
358 
359  // Map std::string <-> TrisolverType::Enum.
360  Array<std::string> trisolverTypeStrs;
361  Array<Details::TrisolverType::Enum> trisolverTypeEnums;
362  Details::TrisolverType::loadPLTypeOption(trisolverTypeStrs, trisolverTypeEnums);
364  s2i(trisolverTypeStrs(), trisolverTypeEnums(), typeName, false);
365 
366  trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
367  } while (0);
368 
369  if (trisolverType == Details::TrisolverType::HTS) {
370  htsImpl_ = Teuchos::rcp(new HtsImpl());
371  htsImpl_->setParameters(pl);
372  }
373 
374  if (trisolverType == Details::TrisolverType::KSPTRSV) {
375  this->isKokkosKernelsSptrsv_ = true;
376  } else {
377  this->isKokkosKernelsSptrsv_ = false;
378  }
379 
380  if (pl.isParameter("trisolver: reverse U"))
381  reverseStorage_ = pl.get<bool>("trisolver: reverse U");
382 
383  TEUCHOS_TEST_FOR_EXCEPTION(reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
384  std::logic_error,
385  "Ifpack2::LocalSparseTriangularSolver::setParameters: "
386  "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
387  "options. See GitHub issue #2647.");
388 }
389 
390 template <class MatrixType>
393  using Tpetra::Details::determineLocalTriangularStructure;
394 
395  using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
396  using LO = local_ordinal_type;
397 
398  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
399  if (!out_.is_null()) {
400  *out_ << ">>> DEBUG " << prefix << std::endl;
401  }
402 
403  if (!isKokkosKernelsStream_) {
404  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
405  "setMatrix() with a nonnull input matrix before you may call "
406  "initialize() or compute().");
407  if (A_crs_.is_null()) {
408  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
409  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.get() == nullptr, std::invalid_argument,
410  prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
411  A_crs_ = A_crs;
412  }
413  auto G = A_crs_->getGraph();
414  TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
415  "but A_crs_'s RowGraph G is null. "
416  "Please report this bug to the Ifpack2 developers.");
417  // At this point, the graph MUST be fillComplete. The "initialize"
418  // (symbolic) part of setup only depends on the graph structure, so
419  // the matrix itself need not be fill complete.
420  TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
421  "If you call this method, "
422  "the matrix's graph must be fill complete. It is not.");
423 
424  // mfh 30 Apr 2018: See GitHub Issue #2658.
425  constexpr bool ignoreMapsForTriStructure = true;
426  auto lclTriStructure = [&] {
427  auto lclMatrix = A_crs_->getLocalMatrixDevice();
428  auto lclRowMap = A_crs_->getRowMap()->getLocalMap();
429  auto lclColMap = A_crs_->getColMap()->getLocalMap();
430  auto lclTriStruct =
431  determineLocalTriangularStructure(lclMatrix.graph,
432  lclRowMap,
433  lclColMap,
434  ignoreMapsForTriStructure);
435  const LO lclNumRows = lclRowMap.getLocalNumElements();
436  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
437  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" : (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
438  return lclTriStruct;
439  }();
440 
441  if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
442  htsImpl_.is_null()) {
443  // Reverse the storage for an upper triangular matrix
444  auto Alocal = A_crs_->getLocalMatrixDevice();
445  auto ptr = Alocal.graph.row_map;
446  auto ind = Alocal.graph.entries;
447  auto val = Alocal.values;
448 
449  auto numRows = Alocal.numRows();
450  auto numCols = Alocal.numCols();
451  auto numNnz = Alocal.nnz();
452 
453  typename decltype(ptr)::non_const_type newptr("ptr", ptr.extent(0));
454  typename decltype(ind)::non_const_type newind("ind", ind.extent(0));
455  decltype(val) newval("val", val.extent(0));
456 
457  // FIXME: The code below assumes UVM
458  typename crs_matrix_type::execution_space().fence();
459  newptr(0) = 0;
460  for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
461  auto A_r = Alocal.row(numRows - 1 - row);
462 
463  auto numEnt = A_r.length;
464  for (local_ordinal_type k = 0; k < numEnt; ++k) {
465  newind(rowStart + k) = numCols - 1 - A_r.colidx(numEnt - 1 - k);
466  newval(rowStart + k) = A_r.value(numEnt - 1 - k);
467  }
468  rowStart += numEnt;
469  newptr(row + 1) = rowStart;
470  }
471  typename crs_matrix_type::execution_space().fence();
472 
473  // Reverse maps
474  Teuchos::RCP<map_type> newRowMap, newColMap;
475  {
476  // Reverse row map
477  auto rowMap = A_->getRowMap();
478  auto numElems = rowMap->getLocalNumElements();
479  auto rowElems = rowMap->getLocalElementList();
480 
481  Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
482  for (size_t i = 0; i < numElems; i++)
483  newRowElems[i] = rowElems[numElems - 1 - i];
484 
485  newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
486  }
487  {
488  // Reverse column map
489  auto colMap = A_->getColMap();
490  auto numElems = colMap->getLocalNumElements();
491  auto colElems = colMap->getLocalElementList();
492 
493  Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
494  for (size_t i = 0; i < numElems; i++)
495  newColElems[i] = colElems[numElems - 1 - i];
496 
497  newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
498  }
499 
500  // Construct new matrix
501  local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
502 
503  A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
504 
505  isInternallyChanged_ = true;
506 
507  // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
508  // didn't want to break any invariants, especially considering
509  // that this branch is likely poorly tested.
510  auto newLclTriStructure =
511  determineLocalTriangularStructure(newLocalMatrix.graph,
512  newRowMap->getLocalMap(),
513  newColMap->getLocalMap(),
514  ignoreMapsForTriStructure);
515  const LO newLclNumRows = newRowMap->getLocalNumElements();
516  this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
517  this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" : (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
518  }
519  } else {
520  bool prev_ambiguous = false;
521  bool all_ambiguous = true;
522  for (int i = 0; i < num_streams_; i++) {
523  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
524  "setMatrix() with a nonnull input matrix before you may call "
525  "initialize() or compute().");
526  auto G = A_crs_v_[i]->getGraph();
527  TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_crs_ are nonnull, "
528  "but A_crs_'s RowGraph G is null. "
529  "Please report this bug to the Ifpack2 developers.");
530  // At this point, the graph MUST be fillComplete. The "initialize"
531  // (symbolic) part of setup only depends on the graph structure, so
532  // the matrix itself need not be fill complete.
533  TEUCHOS_TEST_FOR_EXCEPTION(!G->isFillComplete(), std::runtime_error,
534  "If you call this method, "
535  "the matrix's graph must be fill complete. It is not.");
536 
537  // mfh 30 Apr 2018: See GitHub Issue #2658.
538  constexpr bool ignoreMapsForTriStructure = true;
539  std::string prev_uplo = this->uplo_;
540  std::string prev_diag = this->diag_;
541  auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice();
542  auto lclRowMap = A_crs_v_[i]->getRowMap()->getLocalMap();
543  auto lclColMap = A_crs_v_[i]->getColMap()->getLocalMap();
544  auto lclTriStruct =
545  determineLocalTriangularStructure(lclMatrix.graph,
546  lclRowMap,
547  lclColMap,
548  ignoreMapsForTriStructure);
549  const LO lclNumRows = lclRowMap.getLocalNumElements();
550  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
551  const bool could_be_lower = lclTriStruct.couldBeLowerTriangular;
552  const bool could_be_upper = lclTriStruct.couldBeUpperTriangular;
553  if (could_be_lower && could_be_upper) {
554  // Ambiguous, but that's OK if at least one stream is unabiguous
555  this->uplo_ = prev_uplo;
556  prev_ambiguous = true;
557  } else {
558  this->uplo_ = could_be_lower ? "L" : (could_be_upper ? "U" : "N");
559  if (this->uplo_ != "N" && prev_uplo == "N" && prev_ambiguous) {
560  prev_uplo = this->uplo_;
561  }
562  prev_ambiguous = false;
563  }
564  all_ambiguous &= prev_ambiguous;
565  if (i > 0) {
566  TEUCHOS_TEST_FOR_EXCEPTION((this->diag_ != prev_diag) || (this->uplo_ != prev_uplo),
567  std::logic_error, prefix << "A_crs_'s structures in streams "
568  "are different. Please report this bug to the Ifpack2 developers.");
569  }
570  }
571  // If all streams were ambiguous, just call it "L"
572  if (all_ambiguous) {
573  this->uplo_ = "L";
574  }
575  }
576 
577  if (Teuchos::nonnull(htsImpl_)) {
578  htsImpl_->initialize(*A_crs_);
579  isInternallyChanged_ = true;
580  }
581 
582  const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
583  kh_v_nonnull_ = false;
584  if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ != "U") {
585  if (!isKokkosKernelsStream_) {
586  kh_ = Teuchos::rcp(new k_handle());
587  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
588 
589  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_, true);
590  auto Alocal = A_crs->getLocalMatrixDevice();
591  auto ptr = Alocal.graph.row_map;
592  auto ind = Alocal.graph.entries;
593  auto val = Alocal.values;
594 
595  auto numRows = Alocal.numRows();
596  kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
597  KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
598  } else {
599  kh_v_ = std::vector<Teuchos::RCP<k_handle>>(num_streams_);
600  for (int i = 0; i < num_streams_; i++) {
601  kh_v_[i] = Teuchos::rcp(new k_handle());
602  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
603  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
604  auto ptr_i = Alocal_i.graph.row_map;
605  auto ind_i = Alocal_i.graph.entries;
606  auto val_i = Alocal_i.values;
607 
608  auto numRows_i = Alocal_i.numRows();
609 
610  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
611  kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
612  KokkosSparse::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
613  }
614  kh_v_nonnull_ = true;
615  }
616  }
617 
618  isInitialized_ = true;
619  ++numInitialize_;
620 }
621 
622 template <class MatrixType>
623 KokkosSparse::Experimental::SPTRSVAlgorithm
625 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
626  // CuSparse only supports int type ordinals
627  // and scalar types of float, double, float complex and double complex
628  if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
629  std::is_same<int, local_ordinal_type>::value &&
630  (std::is_same<scalar_type, float>::value ||
631  std::is_same<scalar_type, double>::value ||
632  std::is_same<impl_scalar_type, Kokkos::complex<float>>::value ||
633  std::is_same<impl_scalar_type, Kokkos::complex<double>>::value)) {
634  return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
635  }
636 #endif
637  return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
638 }
639 
640 template <class MatrixType>
643  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
644  if (!out_.is_null()) {
645  *out_ << ">>> DEBUG " << prefix << std::endl;
646  }
647 
648  if (!isKokkosKernelsStream_) {
649  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, prefix << "You must call "
650  "setMatrix() with a nonnull input matrix before you may call "
651  "initialize() or compute().");
652  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_ is nonnull, but "
653  "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
654  // At this point, the matrix MUST be fillComplete.
655  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
656  "If you call this "
657  "method, the matrix must be fill complete. It is not.");
658  } else {
659  for (int i = 0; i < num_streams_; i++) {
660  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::runtime_error, prefix << "You must call "
661  "setMatrices() with a nonnull input matrix before you may call "
662  "initialize() or compute().");
663  // At this point, the matrix MUST be fillComplete.
664  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
665  "If you call this "
666  "method, the matrix must be fill complete. It is not.");
667  }
668  }
669 
670  if (!isInitialized_) {
671  initialize();
672  }
673  TEUCHOS_TEST_FOR_EXCEPTION(!isInitialized_, std::logic_error, prefix << "initialize() should have "
674  "been called by this point, but isInitialized_ is false. "
675  "Please report this bug to the Ifpack2 developers.");
676 
677 // NOTE (Nov-09-2022):
678 // For Cuda >= 11.3 (using cusparseSpSV), always call symbolic during compute
679 // even when matrix values are changed with the same sparsity pattern.
680 // For Cuda >= 12.1 has a new cusparseSpSV_updateMatrix function just for updating the
681 // values that is substantially faster.
682 // This would all be much better handled via a KokkosSparse::sptrsv_numeric(...)
683 // that could hide the Cuda implementation details.
684 #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
685  if constexpr (std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda>) {
686  if (this->isKokkosKernelsSptrsv_) {
687  if (Teuchos::nonnull(kh_) && !isKokkosKernelsStream_) {
688  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_, true);
689  auto Alocal = A_crs->getLocalMatrixDevice();
690  auto val = Alocal.values;
691 #if (CUSPARSE_VERSION >= 12100)
692  auto* sptrsv_handle = kh_->get_sptrsv_handle();
693  auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
694  cusparseSpSV_updateMatrix(cusparse_handle->handle,
695  cusparse_handle->spsvDescr,
696  val.data(),
697  CUSPARSE_SPSV_UPDATE_GENERAL);
698 #else
699  auto ptr = Alocal.graph.row_map;
700  auto ind = Alocal.graph.entries;
701  KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
702 #endif
703  } else if (kh_v_nonnull_) {
704  for (int i = 0; i < num_streams_; i++) {
705  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i], true);
706  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
707  auto val_i = Alocal_i.values;
708 #if (CUSPARSE_VERSION >= 12100)
709  auto* sptrsv_handle = kh_v_[i]->get_sptrsv_handle();
710  auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
711  IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(
712  cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream()));
713  cusparseSpSV_updateMatrix(cusparse_handle->handle,
714  cusparse_handle->spsvDescr,
715  val_i.data(),
716  CUSPARSE_SPSV_UPDATE_GENERAL);
717 #else
718  auto ptr_i = Alocal_i.graph.row_map;
719  auto ind_i = Alocal_i.graph.entries;
720  KokkosSparse::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
721 #endif
722  }
723  }
724  }
725  }
726 #endif
727 
728  if (!isComputed_) { // Only compute if not computed before
729  if (Teuchos::nonnull(htsImpl_))
730  htsImpl_->compute(*A_crs_, out_);
731 
732  isComputed_ = true;
733  ++numCompute_;
734  }
735 }
736 
737 template <class MatrixType>
739  apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type,
741  Tpetra::MultiVector<scalar_type, local_ordinal_type,
743  Teuchos::ETransp mode,
744  scalar_type alpha,
745  scalar_type beta) const {
746  using Teuchos::RCP;
747  using Teuchos::rcp;
748  using Teuchos::rcpFromRef;
749  typedef scalar_type ST;
750  typedef Teuchos::ScalarTraits<ST> STS;
751  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
752 
753  if (!out_.is_null()) {
754  *out_ << ">>> DEBUG " << prefix;
755  if (!isKokkosKernelsStream_) {
756  if (A_crs_.is_null()) {
757  *out_ << "A_crs_ is null!" << std::endl;
758  } else {
760  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_);
761  const std::string uplo = this->uplo_;
762  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
763  const std::string diag = this->diag_;
764  *out_ << "uplo=\"" << uplo
765  << "\", trans=\"" << trans
766  << "\", diag=\"" << diag << "\"" << std::endl;
767  }
768  } else {
769  for (int i = 0; i < num_streams_; i++) {
770  if (A_crs_v_[i].is_null()) {
771  *out_ << "A_crs_v_[" << i << "]"
772  << " is null!" << std::endl;
773  } else {
774  const std::string uplo = this->uplo_;
775  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
776  const std::string diag = this->diag_;
777  *out_ << "A_crs_v_[" << i << "]: "
778  << "uplo=\"" << uplo
779  << "\", trans=\"" << trans
780  << "\", diag=\"" << diag << "\"" << std::endl;
781  }
782  }
783  }
784  }
785 
786  TEUCHOS_TEST_FOR_EXCEPTION(!isComputed(), std::runtime_error, prefix << "If compute() has not yet "
787  "been called, or if you have changed the matrix via setMatrix(), you must "
788  "call compute() before you may call this method.");
789  // If isComputed() is true, it's impossible for the matrix to be
790  // null, or for it not to be a Tpetra::CrsMatrix.
791  if (!isKokkosKernelsStream_) {
792  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::logic_error, prefix << "A_ is null. "
793  "Please report this bug to the Ifpack2 developers.");
794  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_.is_null(), std::logic_error, prefix << "A_crs_ is null. "
795  "Please report this bug to the Ifpack2 developers.");
796  // However, it _is_ possible that the user called resumeFill() on
797  // the matrix, after calling compute(). This is NOT allowed.
798  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_->isFillComplete(), std::runtime_error,
799  "If you call this "
800  "method, the matrix must be fill complete. It is not. This means that "
801  " you must have called resumeFill() on the matrix before calling apply(). "
802  "This is NOT allowed. Note that this class may use the matrix's data in "
803  "place without copying it. Thus, you cannot change the matrix and expect "
804  "the solver to stay the same. If you have changed the matrix, first call "
805  "fillComplete() on it, then call compute() on this object, before you call"
806  " apply(). You do NOT need to call setMatrix, as long as the matrix "
807  "itself (that is, its address in memory) is the same.");
808  } else {
809  for (int i = 0; i < num_streams_; i++) {
810  TEUCHOS_TEST_FOR_EXCEPTION(A_crs_v_[i].is_null(), std::logic_error, prefix << "A_crs_ is null. "
811  "Please report this bug to the Ifpack2 developers.");
812  // However, it _is_ possible that the user called resumeFill() on
813  // the matrix, after calling compute(). This is NOT allowed.
814  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
815  "If you call this "
816  "method, the matrix must be fill complete. It is not. This means that "
817  " you must have called resumeFill() on the matrix before calling apply(). "
818  "This is NOT allowed. Note that this class may use the matrix's data in "
819  "place without copying it. Thus, you cannot change the matrix and expect "
820  "the solver to stay the same. If you have changed the matrix, first call "
821  "fillComplete() on it, then call compute() on this object, before you call"
822  " apply(). You do NOT need to call setMatrix, as long as the matrix "
823  "itself (that is, its address in memory) is the same.");
824  }
825  }
826 
827  RCP<const MV> X_cur;
828  RCP<MV> Y_cur;
829 
830  if (!isKokkosKernelsStream_) {
831  auto G = A_crs_->getGraph();
832  TEUCHOS_TEST_FOR_EXCEPTION(G.is_null(), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
833  "but A_crs_'s RowGraph G is null. "
834  "Please report this bug to the Ifpack2 developers.");
835  auto importer = G->getImporter();
836  auto exporter = G->getExporter();
837 
838  if (!importer.is_null()) {
839  if (X_colMap_.is_null() || X_colMap_->getNumVectors() != X.getNumVectors()) {
840  X_colMap_ = rcp(new MV(importer->getTargetMap(), X.getNumVectors()));
841  } else {
842  X_colMap_->putScalar(STS::zero());
843  }
844  // See discussion of Github Issue #672 for why the Import needs to
845  // use the ZERO CombineMode. The case where the Export is
846  // nontrivial is likely never exercised.
847  X_colMap_->doImport(X, *importer, Tpetra::ZERO);
848  }
849  X_cur = importer.is_null() ? rcpFromRef(X) : Teuchos::rcp_const_cast<const MV>(X_colMap_);
850 
851  if (!exporter.is_null()) {
852  if (Y_rowMap_.is_null() || Y_rowMap_->getNumVectors() != Y.getNumVectors()) {
853  Y_rowMap_ = rcp(new MV(exporter->getSourceMap(), Y.getNumVectors()));
854  } else {
855  Y_rowMap_->putScalar(STS::zero());
856  }
857  Y_rowMap_->doExport(Y, *importer, Tpetra::ADD);
858  }
859  Y_cur = exporter.is_null() ? rcpFromRef(Y) : Y_rowMap_;
860  } else {
861  // Currently assume X and Y are local vectors (same sizes as A_crs_).
862  // Should do a better job here!!!
863  X_cur = rcpFromRef(X);
864  Y_cur = rcpFromRef(Y);
865  }
866 
867  localApply(*X_cur, *Y_cur, mode, alpha, beta);
868 
869  if (!isKokkosKernelsStream_) {
870  auto G = A_crs_->getGraph();
871  auto exporter = G->getExporter();
872  if (!exporter.is_null()) {
873  Y.putScalar(STS::zero());
874  Y.doExport(*Y_cur, *exporter, Tpetra::ADD);
875  }
876  }
877  ++numApply_;
878 }
879 
880 template <class MatrixType>
882  localTriangularSolve(const MV& Y,
883  MV& X,
884  const Teuchos::ETransp mode) const {
885  using Teuchos::CONJ_TRANS;
886  using Teuchos::NO_TRANS;
887  using Teuchos::TRANS;
888  const char tfecfFuncName[] = "localTriangularSolve: ";
889 
890  if (!isKokkosKernelsStream_) {
891  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_->isFillComplete(), std::runtime_error,
892  "The matrix is not fill complete.");
893  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
894  "The matrix is neither upper triangular or lower triangular. "
895  "You may only call this method if the matrix is triangular. "
896  "Remember that this is a local (per MPI process) property, and that "
897  "Tpetra only knows how to do a local (per process) triangular solve.");
898  } else {
899  for (int i = 0; i < num_streams_; i++) {
900  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!A_crs_v_[i]->isFillComplete(), std::runtime_error,
901  "The matrix is not fill complete.");
902  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
903  "The matrix is neither upper triangular or lower triangular. "
904  "You may only call this method if the matrix is triangular. "
905  "Remember that this is a local (per MPI process) property, and that "
906  "Tpetra only knows how to do a local (per process) triangular solve.");
907  }
908  }
909  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!X.isConstantStride() || !Y.isConstantStride(), std::invalid_argument,
910  "X and Y must be constant stride.");
912  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(STS::isComplex && mode == TRANS, std::logic_error,
913  "This method does "
914  "not currently support non-conjugated transposed solve (mode == "
915  "Teuchos::TRANS) for complex scalar types.");
916 
917  const std::string uplo = this->uplo_;
918  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" : (mode == Teuchos::TRANS ? "T" : "N");
919  const size_t numVecs = std::min(X.getNumVectors(), Y.getNumVectors());
920 
921  if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N") {
922  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(this->A_);
923  auto A_lclk = A_crs->getLocalMatrixDevice();
924  auto ptr = A_lclk.graph.row_map;
925  auto ind = A_lclk.graph.entries;
926  auto val = A_lclk.values;
927 
928  for (size_t j = 0; j < numVecs; ++j) {
929  auto X_j = X.getVectorNonConst(j);
930  auto Y_j = Y.getVector(j);
931  auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
932  auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
933  auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
934  auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
935  KokkosSparse::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
936  // TODO is this fence needed...
937  typename k_handle::HandleExecSpace().fence();
938  }
939  } // End using regular interface of Kokkos Kernels Sptrsv
940  else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans == "N") {
941  std::vector<lno_row_view_t> ptr_v(num_streams_);
942  std::vector<lno_nonzero_view_t> ind_v(num_streams_);
943  std::vector<scalar_nonzero_view_t> val_v(num_streams_);
944  std::vector<k_handle*> KernelHandle_rawptr_v_(num_streams_);
945  for (size_t j = 0; j < numVecs; ++j) {
946  auto X_j = X.getVectorNonConst(j);
947  auto Y_j = Y.getVector(j);
948  auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
949  auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
950  auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
951  auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
952  std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
953  std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
954  local_ordinal_type stream_begin = 0;
955  local_ordinal_type stream_end;
956  for (int i = 0; i < num_streams_; i++) {
957  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_crs_v_[i]);
958  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
959  ptr_v[i] = Alocal_i.graph.row_map;
960  ind_v[i] = Alocal_i.graph.entries;
961  val_v[i] = Alocal_i.values;
962  stream_end = stream_begin + Alocal_i.numRows();
963  x_v[i] = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
964  y_v[i] = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
965  KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
966  stream_begin = stream_end;
967  }
968  Kokkos::fence();
969  KokkosSparse::Experimental::sptrsv_solve_streams(exec_space_instances_, KernelHandle_rawptr_v_,
970  ptr_v, ind_v, val_v, y_v, x_v);
971  Kokkos::fence();
972  }
973  } // End using stream interface of Kokkos Kernels Sptrsv
974  else {
975  const std::string diag = this->diag_;
976  // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
977  // sequential, host-only code. See
978  // https://github.com/kokkos/kokkos-kernels/issues/48.
979 
980  auto A_lcl = this->A_crs_->getLocalMatrixHost();
981 
982  if (X.isConstantStride() && Y.isConstantStride()) {
983  auto X_lcl = X.getLocalViewHost(Tpetra::Access::ReadWrite);
984  auto Y_lcl = Y.getLocalViewHost(Tpetra::Access::ReadOnly);
985  KokkosSparse::trsv(uplo.c_str(), trans.c_str(), diag.c_str(),
986  A_lcl, Y_lcl, X_lcl);
987  } else {
988  for (size_t j = 0; j < numVecs; ++j) {
989  auto X_j = X.getVectorNonConst(j);
990  auto Y_j = Y.getVector(j);
991  auto X_lcl = X_j->getLocalViewHost(Tpetra::Access::ReadWrite);
992  auto Y_lcl = Y_j->getLocalViewHost(Tpetra::Access::ReadOnly);
993  KokkosSparse::trsv(uplo.c_str(), trans.c_str(),
994  diag.c_str(), A_lcl, Y_lcl, X_lcl);
995  }
996  }
997  }
998 }
999 
1000 template <class MatrixType>
1001 void LocalSparseTriangularSolver<MatrixType>::
1002  localApply(const MV& X,
1003  MV& Y,
1004  const Teuchos::ETransp mode,
1005  const scalar_type& alpha,
1006  const scalar_type& beta) const {
1007  if (mode == Teuchos::NO_TRANS && Teuchos::nonnull(htsImpl_) &&
1008  htsImpl_->isComputed()) {
1009  htsImpl_->localApply(X, Y, mode, alpha, beta);
1010  return;
1011  }
1012 
1013  using Teuchos::RCP;
1014  typedef scalar_type ST;
1015  typedef Teuchos::ScalarTraits<ST> STS;
1016 
1017  if (beta == STS::zero()) {
1018  if (alpha == STS::zero()) {
1019  Y.putScalar(STS::zero()); // Y := 0 * Y (ignore contents of Y)
1020  } else { // alpha != 0
1021  this->localTriangularSolve(X, Y, mode);
1022  if (alpha != STS::one()) {
1023  Y.scale(alpha);
1024  }
1025  }
1026  } else { // beta != 0
1027  if (alpha == STS::zero()) {
1028  Y.scale(beta); // Y := beta * Y
1029  } else { // alpha != 0
1030  MV Y_tmp(Y, Teuchos::Copy);
1031  this->localTriangularSolve(X, Y_tmp, mode); // Y_tmp := M * X
1032  Y.update(alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
1033  }
1034  }
1035 }
1036 
1037 template <class MatrixType>
1040  return numInitialize_;
1041 }
1042 
1043 template <class MatrixType>
1046  return numCompute_;
1047 }
1048 
1049 template <class MatrixType>
1051  getNumApply() const {
1052  return numApply_;
1053 }
1054 
1055 template <class MatrixType>
1056 double
1059  return initializeTime_;
1060 }
1061 
1062 template <class MatrixType>
1063 double
1066  return computeTime_;
1067 }
1068 
1069 template <class MatrixType>
1070 double
1072  getApplyTime() const {
1073  return applyTime_;
1074 }
1075 
1076 template <class MatrixType>
1077 std::string
1079  description() const {
1080  std::ostringstream os;
1081 
1082  // Output is a valid YAML dictionary in flow style. If you don't
1083  // like everything on a single line, you should call describe()
1084  // instead.
1085  os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
1086  if (this->getObjectLabel() != "") {
1087  os << "Label: \"" << this->getObjectLabel() << "\", ";
1088  }
1089  os << "Initialized: " << (isInitialized() ? "true" : "false") << ", "
1090  << "Computed: " << (isComputed() ? "true" : "false") << ", ";
1091 
1092  if (isKokkosKernelsSptrsv_) os << "KK-SPTRSV, ";
1093  if (isKokkosKernelsStream_) os << "KK-SolveStream, ";
1094 
1095  if (A_.is_null()) {
1096  os << "Matrix: null";
1097  } else {
1098  os << "Matrix dimensions: ["
1099  << A_->getGlobalNumRows() << ", "
1100  << A_->getGlobalNumCols() << "]"
1101  << ", Number of nonzeros: " << A_->getGlobalNumEntries();
1102  }
1103 
1104  if (Teuchos::nonnull(htsImpl_))
1105  os << ", HTS computed: " << (htsImpl_->isComputed() ? "true" : "false");
1106  os << "}";
1107  return os.str();
1108 }
1109 
1110 template <class MatrixType>
1113  const Teuchos::EVerbosityLevel verbLevel) const {
1114  using std::endl;
1115  // Default verbosity level is VERB_LOW, which prints only on Process
1116  // 0 of the matrix's communicator.
1117  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
1118 
1119  if (vl != Teuchos::VERB_NONE) {
1120  // Print only on Process 0 in the matrix's communicator. If the
1121  // matrix is null, though, we have to get the communicator from
1122  // somewhere, so we ask Tpetra for its default communicator. If
1123  // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
1124  auto comm = A_.is_null() ? Tpetra::getDefaultComm() : A_->getComm();
1125 
1126  // Users aren't supposed to do anything with the matrix on
1127  // processes where its communicator is null.
1128  if (!comm.is_null() && comm->getRank() == 0) {
1129  // By convention, describe() should always begin with a tab.
1130  Teuchos::OSTab tab0(out);
1131  // Output is in YAML format. We have to escape the class name,
1132  // because it has a colon.
1133  out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1134  Teuchos::OSTab tab1(out);
1135  out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name() << endl
1136  << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name() << endl
1137  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name() << endl
1138  << "Node: " << Teuchos::TypeNameTraits<node_type>::name() << endl;
1139  }
1140  }
1141 }
1142 
1143 template <class MatrixType>
1146  getDomainMap() const {
1147  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1148  "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1149  "The matrix is null. Please call setMatrix() with a nonnull input "
1150  "before calling this method.");
1151  return A_->getDomainMap();
1152 }
1153 
1154 template <class MatrixType>
1157  getRangeMap() const {
1158  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error,
1159  "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1160  "The matrix is null. Please call setMatrix() with a nonnull input "
1161  "before calling this method.");
1162  return A_->getRangeMap();
1163 }
1164 
1165 template <class MatrixType>
1168  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1169 
1170  // If the pointer didn't change, do nothing. This is reasonable
1171  // because users are supposed to call this method with the same
1172  // object over all participating processes, and pointer identity
1173  // implies object identity.
1174  if (A.getRawPtr() != A_.getRawPtr() || isInternallyChanged_) {
1175  // Check in serial or one-process mode if the matrix is square.
1176  TEUCHOS_TEST_FOR_EXCEPTION(!A.is_null() && A->getComm()->getSize() == 1 &&
1177  A->getLocalNumRows() != A->getLocalNumCols(),
1178  std::runtime_error, prefix << "If A's communicator only contains one "
1179  "process, then A must be square. Instead, you provided a matrix A with "
1180  << A->getLocalNumRows() << " rows and " << A->getLocalNumCols() << " columns.");
1181 
1182  // It's legal for A to be null; in that case, you may not call
1183  // initialize() until calling setMatrix() with a nonnull input.
1184  // Regardless, setting the matrix invalidates the preconditioner.
1185  isInitialized_ = false;
1186  isComputed_ = false;
1187 
1188  if (A.is_null()) {
1189  A_crs_ = Teuchos::null;
1190  A_ = Teuchos::null;
1191  } else { // A is not null
1193  Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A);
1194  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1195  A_crs_ = A_crs;
1196  A_ = A;
1197  }
1198 
1199  if (Teuchos::nonnull(htsImpl_))
1200  htsImpl_->reset();
1201  } // pointers are not the same
1202 }
1203 
1204 template <class MatrixType>
1206  setStreamInfo(const bool& isKokkosKernelsStream, const int& num_streams,
1207  const std::vector<HandleExecSpace>& exec_space_instances) {
1208  isKokkosKernelsStream_ = isKokkosKernelsStream;
1209  num_streams_ = num_streams;
1210  exec_space_instances_ = exec_space_instances;
1211  A_crs_v_ = std::vector<Teuchos::RCP<crs_matrix_type>>(num_streams_);
1212 }
1213 
1214 template <class MatrixType>
1216  setMatrices(const std::vector<Teuchos::RCP<crs_matrix_type>>& A_crs_v) {
1217  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1218 
1219  for (int i = 0; i < num_streams_; i++) {
1220  // If the pointer didn't change, do nothing. This is reasonable
1221  // because users are supposed to call this method with the same
1222  // object over all participating processes, and pointer identity
1223  // implies object identity.
1224  if (A_crs_v[i].getRawPtr() != A_crs_v_[i].getRawPtr() || isInternallyChanged_) {
1225  // Check in serial or one-process mode if the matrix is square.
1226  TEUCHOS_TEST_FOR_EXCEPTION(!A_crs_v[i].is_null() && A_crs_v[i]->getComm()->getSize() == 1 &&
1227  A_crs_v[i]->getLocalNumRows() != A_crs_v[i]->getLocalNumCols(),
1228  std::runtime_error, prefix << "If A's communicator only contains one "
1229  "process, then A must be square. Instead, you provided a matrix A with "
1230  << A_crs_v[i]->getLocalNumRows() << " rows and " << A_crs_v[i]->getLocalNumCols() << " columns.");
1231 
1232  // It's legal for A to be null; in that case, you may not call
1233  // initialize() until calling setMatrix() with a nonnull input.
1234  // Regardless, setting the matrix invalidates the preconditioner.
1235  isInitialized_ = false;
1236  isComputed_ = false;
1237 
1238  if (A_crs_v[i].is_null()) {
1239  A_crs_v_[i] = Teuchos::null;
1240  } else { // A is not null
1242  Teuchos::rcp_dynamic_cast<crs_matrix_type>(A_crs_v[i]);
1243  TEUCHOS_TEST_FOR_EXCEPTION(A_crs.is_null(), std::invalid_argument, prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
1244  A_crs_v_[i] = A_crs;
1245  }
1246  } // pointers are not the same
1247  }
1248 }
1249 
1250 } // namespace Ifpack2
1251 
1252 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S, LO, GO, N) \
1253  template class Ifpack2::LocalSparseTriangularSolver<Tpetra::RowMatrix<S, LO, GO, N>>;
1254 
1255 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:739
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1045
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1079
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
&quot;Numeric&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:642
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:191
T * get() const
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1072
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1065
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1058
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
bool isParameter(const std::string &name) const
T * getRawPtr() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner&#39;s matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1216
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1146
&quot;Preconditioner&quot; that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void initialize()
&quot;Symbolic&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:392
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1039
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver&#39;s stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1206
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner&#39;s matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1167
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string &paramName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1051
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1157
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:77
void setParameters(const Teuchos::ParameterList &params)
Set this object&#39;s parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:348
MatrixType::scalar_type scalar_type
Scalar type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:56
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1112
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:332
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:296
static std::string name()
std::string typeName(const T &t)
bool is_null() const