Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_LocalSparseTriangularSolver_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
12 
13 #include <sstream> // ostringstream
14 #include <stdexcept> // runtime_error
15 
16 #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
17 #include "Tpetra_CrsMatrix.hpp"
18 #include "Tpetra_Core.hpp"
19 #include "Teuchos_StandardParameterEntryValidators.hpp"
20 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
21 #include "KokkosSparse_trsv.hpp"
22 
23 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
24 # include "shylu_hts.hpp"
25 #endif
26 
27 namespace Ifpack2 {
28 
29 namespace Details {
30 
31 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
32 
33 inline void cusparse_error_throw(cusparseStatus_t cusparseStatus, const char* name,
34  const char* file, const int line) {
35  std::ostringstream out;
36 #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)
37  out << name << " error( " << cusparseGetErrorName(cusparseStatus) << "): " << cusparseGetErrorString(cusparseStatus);
38 #else
39  out << name << " error( ";
40  switch (cusparseStatus) {
41  case CUSPARSE_STATUS_NOT_INITIALIZED:
42  out << "CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not "
43  "created correctly.";
44  break;
45  case CUSPARSE_STATUS_ALLOC_FAILED:
46  out << "CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too "
47  "much memory";
48  break;
49  case CUSPARSE_STATUS_INVALID_VALUE: out << "CUSPARSE_STATUS_INVALID_VALUE)"; break;
50  case CUSPARSE_STATUS_ARCH_MISMATCH: out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; break;
51  case CUSPARSE_STATUS_MAPPING_ERROR: out << "CUSPARSE_STATUS_MAPPING_ERROR)"; break;
52  case CUSPARSE_STATUS_EXECUTION_FAILED: out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; break;
53  case CUSPARSE_STATUS_INTERNAL_ERROR: out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; break;
54  case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break;
55  case CUSPARSE_STATUS_ZERO_PIVOT: out << "CUSPARSE_STATUS_ZERO_PIVOT)"; break;
56  default: out << "unrecognized error code): this is bad!"; break;
57  }
58 #endif // CUSPARSE_VERSION
59  if (file) {
60  out << " " << file << ":" << line;
61  }
62  throw std::runtime_error(out.str());
63 }
64 
65 inline void cusparse_safe_call(cusparseStatus_t cusparseStatus, const char* name, const char* file = nullptr,
66  const int line = 0) {
67  if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) {
68  cusparse_error_throw(cusparseStatus, name, file, line);
69  }
70 }
71 
72 #define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \
73  Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__)
74 
75 #endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
76 
77 struct TrisolverType {
78  enum Enum {
79  Internal,
80  HTS,
81  KSPTRSV
82  };
83 
84  static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
85  type_strs.resize(3);
86  type_strs[0] = "Internal";
87  type_strs[1] = "HTS";
88  type_strs[2] = "KSPTRSV";
89  type_enums.resize(3);
90  type_enums[0] = Internal;
91  type_enums[1] = HTS;
92  type_enums[2] = KSPTRSV;
93  }
94 };
95 }
96 
97 template<class MatrixType>
98 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
99 public:
100  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
101 
102  void reset () {
103 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
104  Timpl_ = Teuchos::null;
105  levelset_block_size_ = 1;
106 #endif
107  }
108 
109  void setParameters (const Teuchos::ParameterList& pl) {
110  (void)pl;
111 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
112  const char* block_size_s = "trisolver: block size";
113  if (pl.isParameter(block_size_s)) {
114  TEUCHOS_TEST_FOR_EXCEPT_MSG( ! pl.isType<int>(block_size_s),
115  "The parameter \"" << block_size_s << "\" must be of type int.");
116  levelset_block_size_ = pl.get<int>(block_size_s);
117  }
118  if (levelset_block_size_ < 1)
119  levelset_block_size_ = 1;
120 #endif
121  }
122 
123  // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
124  // call to compute() will trigger the symbolic+numeric phase, and subsequent
125  // calls (with the same Timpl_) will trigger the numeric phase. In the call to
126  // initialize(), essentially nothing happens.
127  void initialize (const crs_matrix_type& /* unused */) {
128 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
129  reset();
130  transpose_ = conjugate_ = false;
131 #endif
132  }
133 
134  void compute (const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
135  (void)T_in;
136  (void)out;
137 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
138  using Teuchos::ArrayRCP;
139 
140  auto rowptr = T_in.getLocalRowPtrsHost();
141  auto colidx = T_in.getLocalIndicesHost();
142  auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
143  Kokkos::fence();
144 
145  Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
146  HTST::make_CrsMatrix(rowptr.size() - 1,
147  rowptr.data(), colidx.data(),
148  // For std/Kokkos::complex.
149  reinterpret_cast<const scalar_type*>(val.data()),
150  transpose_, conjugate_),
151  HtsCrsMatrixDeleter());
152 
153  if (Teuchos::nonnull(Timpl_)) {
154  // Reuse the nonzero pattern.
155  HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
156  } else {
157  // Build from scratch.
158  if (T_in.getCrsGraph().is_null()) {
159  if (Teuchos::nonnull(out))
160  *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
161  return;
162  }
163  if ( ! T_in.getCrsGraph()->isSorted()) {
164  if (Teuchos::nonnull(out))
165  *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
166  return;
167  }
168  if ( ! T_in.isStorageOptimized()) {
169  if (Teuchos::nonnull(out))
170  *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
171  return;
172  }
173 
174  typename HTST::PreprocessArgs args;
175  args.T = T_hts.get();
176  args.max_nrhs = 1;
177 #ifdef _OPENMP
178  args.nthreads = omp_get_max_threads();
179 #else
180  args.nthreads = 1;
181 #endif
182  args.save_for_reprocess = true;
183  typename HTST::Options opts;
184  opts.levelset_block_size = levelset_block_size_;
185  args.options = &opts;
186 
187  try {
188  Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
189  } catch (const std::exception& e) {
190  if (Teuchos::nonnull(out))
191  *out << "HTS preprocess threw: " << e.what() << "\n";
192  }
193  }
194 #endif
195  }
196 
197  // HTS may not be able to handle a matrix, so query whether compute()
198  // succeeded.
199  bool isComputed () {
200 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
201  return Teuchos::nonnull(Timpl_);
202 #else
203  return false;
204 #endif
205  }
206 
207  // Y := beta * Y + alpha * (M * X)
208  void localApply (const MV& X, MV& Y,
209  const Teuchos::ETransp /* mode */,
210  const scalar_type& alpha, const scalar_type& beta) const {
211  (void)X;
212  (void)Y;
213  (void)alpha;
214  (void)beta;
215 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216  const auto& X_view = X.getLocalViewHost (Tpetra::Access::ReadOnly);
217  const auto& Y_view = Y.getLocalViewHost (Tpetra::Access::ReadWrite);
218 
219  // Only does something if #rhs > current capacity.
220  HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
221  // Switch alpha and beta because of HTS's opposite convention.
222  HTST::solve_omp(Timpl_.get(),
223  // For std/Kokkos::complex.
224  reinterpret_cast<const scalar_type*>(X_view.data()),
225  X_view.extent(1),
226  // For std/Kokkos::complex.
227  reinterpret_cast<scalar_type*>(Y_view.data()),
228  beta, alpha);
229 #endif
230  }
231 
232 private:
233 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
234  typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
235  typedef typename HTST::Impl TImpl;
236  typedef typename HTST::CrsMatrix HtsCrsMatrix;
237 
238  struct TImplDeleter {
239  void free (TImpl* impl) {
240  HTST::delete_Impl(impl);
241  }
242  };
243 
244  struct HtsCrsMatrixDeleter {
245  void free (HtsCrsMatrix* T) {
246  HTST::delete_CrsMatrix(T);
247  }
248  };
249 
250  Teuchos::RCP<TImpl> Timpl_;
251  bool transpose_, conjugate_;
252  int levelset_block_size_;
253 #endif
254 };
255 
256 template<class MatrixType>
259  A_ (A)
260 {
261  initializeState();
262 
263  if (! A.is_null ()) {
265  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
267  (A_crs.is_null (), std::invalid_argument,
268  "Ifpack2::LocalSparseTriangularSolver constructor: "
269  "The input matrix A is not a Tpetra::CrsMatrix.");
270  A_crs_ = A_crs;
271  }
272 }
273 
274 template<class MatrixType>
278  A_ (A),
279  out_ (out)
280 {
281  initializeState();
282  if (! out_.is_null ()) {
283  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
284  << std::endl;
285  }
286 
287  if (! A.is_null ()) {
289  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
291  (A_crs.is_null (), std::invalid_argument,
292  "Ifpack2::LocalSparseTriangularSolver constructor: "
293  "The input matrix A is not a Tpetra::CrsMatrix.");
294  A_crs_ = A_crs;
295  }
296 }
297 
298 template<class MatrixType>
301 {
302  initializeState();
303 }
304 
305 template<class MatrixType>
308  out_ (out)
309 {
310  initializeState();
311  if (! out_.is_null ()) {
312  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
313  << std::endl;
314  }
315 }
316 
317 template<class MatrixType>
319 {
320  isInitialized_ = false;
321  isComputed_ = false;
322  reverseStorage_ = false;
323  isInternallyChanged_ = false;
324  numInitialize_ = 0;
325  numCompute_ = 0;
326  numApply_ = 0;
327  initializeTime_ = 0.0;
328  computeTime_ = 0.0;
329  applyTime_ = 0.0;
330  isKokkosKernelsSptrsv_ = false;
331  isKokkosKernelsStream_ = false;
332  num_streams_ = 0;
333  uplo_ = "N";
334  diag_ = "N";
335 }
336 
337 template<class MatrixType>
340 {
341  if (!isKokkosKernelsStream_) {
342  if (Teuchos::nonnull (kh_)) {
343  kh_->destroy_sptrsv_handle();
344  }
345  }
346  else {
347  for (int i = 0; i < num_streams_; i++) {
348  if (Teuchos::nonnull (kh_v_[i])) {
349  kh_v_[i]->destroy_sptrsv_handle();
350  }
351  }
352  }
353 }
354 
355 template<class MatrixType>
356 void
359 {
360  using Teuchos::RCP;
362  using Teuchos::Array;
363 
364  Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
365  do {
366  static const char typeName[] = "trisolver: type";
367 
368  if ( ! pl.isType<std::string>(typeName)) break;
369 
370  // Map std::string <-> TrisolverType::Enum.
371  Array<std::string> trisolverTypeStrs;
372  Array<Details::TrisolverType::Enum> trisolverTypeEnums;
373  Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
375  s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName, false);
376 
377  trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
378  } while (0);
379 
380  if (trisolverType == Details::TrisolverType::HTS) {
381  htsImpl_ = Teuchos::rcp (new HtsImpl ());
382  htsImpl_->setParameters (pl);
383  }
384 
385  if (trisolverType == Details::TrisolverType::KSPTRSV) {
386  this->isKokkosKernelsSptrsv_ = true;
387  }
388  else {
389  this->isKokkosKernelsSptrsv_ = false;
390  }
391 
392  if (pl.isParameter("trisolver: reverse U"))
393  reverseStorage_ = pl.get<bool>("trisolver: reverse U");
394 
396  (reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
397  std::logic_error, "Ifpack2::LocalSparseTriangularSolver::setParameters: "
398  "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
399  "options. See GitHub issue #2647.");
400 }
401 
402 template<class MatrixType>
403 void
406 {
407  using Tpetra::Details::determineLocalTriangularStructure;
408 
409  using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
410  using LO = local_ordinal_type;
411 
412  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
413  if (! out_.is_null ()) {
414  *out_ << ">>> DEBUG " << prefix << std::endl;
415  }
416 
417  if (!isKokkosKernelsStream_) {
419  (A_.is_null (), std::runtime_error, prefix << "You must call "
420  "setMatrix() with a nonnull input matrix before you may call "
421  "initialize() or compute().");
422  if (A_crs_.is_null ()) {
423  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
425  (A_crs.get () == nullptr, std::invalid_argument,
426  prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
427  A_crs_ = A_crs;
428  }
429  auto G = A_crs_->getGraph ();
431  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
432  "but A_crs_'s RowGraph G is null. "
433  "Please report this bug to the Ifpack2 developers.");
434  // At this point, the graph MUST be fillComplete. The "initialize"
435  // (symbolic) part of setup only depends on the graph structure, so
436  // the matrix itself need not be fill complete.
438  (! G->isFillComplete (), std::runtime_error, "If you call this method, "
439  "the matrix's graph must be fill complete. It is not.");
440 
441  // mfh 30 Apr 2018: See GitHub Issue #2658.
442  constexpr bool ignoreMapsForTriStructure = true;
443  auto lclTriStructure = [&] {
444  auto lclMatrix = A_crs_->getLocalMatrixDevice ();
445  auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
446  auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
447  auto lclTriStruct =
448  determineLocalTriangularStructure (lclMatrix.graph,
449  lclRowMap,
450  lclColMap,
451  ignoreMapsForTriStructure);
452  const LO lclNumRows = lclRowMap.getLocalNumElements ();
453  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
454  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
455  (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
456  return lclTriStruct;
457  } ();
458 
459  if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
460  htsImpl_.is_null ()) {
461  // Reverse the storage for an upper triangular matrix
462  auto Alocal = A_crs_->getLocalMatrixDevice();
463  auto ptr = Alocal.graph.row_map;
464  auto ind = Alocal.graph.entries;
465  auto val = Alocal.values;
466 
467  auto numRows = Alocal.numRows();
468  auto numCols = Alocal.numCols();
469  auto numNnz = Alocal.nnz();
470 
471  typename decltype(ptr)::non_const_type newptr ("ptr", ptr.extent (0));
472  typename decltype(ind)::non_const_type newind ("ind", ind.extent (0));
473  decltype(val) newval ("val", val.extent (0));
474 
475  // FIXME: The code below assumes UVM
476  typename crs_matrix_type::execution_space().fence();
477  newptr(0) = 0;
478  for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
479  auto A_r = Alocal.row(numRows-1 - row);
480 
481  auto numEnt = A_r.length;
482  for (local_ordinal_type k = 0; k < numEnt; ++k) {
483  newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
484  newval(rowStart + k) = A_r.value (numEnt-1 - k);
485  }
486  rowStart += numEnt;
487  newptr(row+1) = rowStart;
488  }
489  typename crs_matrix_type::execution_space().fence();
490 
491  // Reverse maps
492  Teuchos::RCP<map_type> newRowMap, newColMap;
493  {
494  // Reverse row map
495  auto rowMap = A_->getRowMap();
496  auto numElems = rowMap->getLocalNumElements();
497  auto rowElems = rowMap->getLocalElementList();
498 
499  Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
500  for (size_t i = 0; i < numElems; i++)
501  newRowElems[i] = rowElems[numElems-1 - i];
502 
503  newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
504  }
505  {
506  // Reverse column map
507  auto colMap = A_->getColMap();
508  auto numElems = colMap->getLocalNumElements();
509  auto colElems = colMap->getLocalElementList();
510 
511  Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
512  for (size_t i = 0; i < numElems; i++)
513  newColElems[i] = colElems[numElems-1 - i];
514 
515  newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
516  }
517 
518  // Construct new matrix
519  local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
520 
521  A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
522 
523  isInternallyChanged_ = true;
524 
525  // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
526  // didn't want to break any invariants, especially considering
527  // that this branch is likely poorly tested.
528  auto newLclTriStructure =
529  determineLocalTriangularStructure (newLocalMatrix.graph,
530  newRowMap->getLocalMap (),
531  newColMap->getLocalMap (),
532  ignoreMapsForTriStructure);
533  const LO newLclNumRows = newRowMap->getLocalNumElements ();
534  this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
535  this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" :
536  (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
537  }
538  }
539  else {
540  for (int i = 0; i < num_streams_; i++) {
542  (A_crs_v_[i].is_null (), std::runtime_error, prefix << "You must call "
543  "setMatrix() with a nonnull input matrix before you may call "
544  "initialize() or compute().");
545  auto G = A_crs_v_[i]->getGraph ();
547  (G.is_null (), std::logic_error, prefix << "A_crs_ are nonnull, "
548  "but A_crs_'s RowGraph G is null. "
549  "Please report this bug to the Ifpack2 developers.");
550  // At this point, the graph MUST be fillComplete. The "initialize"
551  // (symbolic) part of setup only depends on the graph structure, so
552  // the matrix itself need not be fill complete.
554  (! G->isFillComplete (), std::runtime_error, "If you call this method, "
555  "the matrix's graph must be fill complete. It is not.");
556 
557  // mfh 30 Apr 2018: See GitHub Issue #2658.
558  constexpr bool ignoreMapsForTriStructure = true;
559  std::string prev_uplo_ = this->uplo_;
560  std::string prev_diag_ = this->diag_;
561  auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice ();
562  auto lclRowMap = A_crs_v_[i]->getRowMap ()->getLocalMap ();
563  auto lclColMap = A_crs_v_[i]->getColMap ()->getLocalMap ();
564  auto lclTriStruct =
565  determineLocalTriangularStructure (lclMatrix.graph,
566  lclRowMap,
567  lclColMap,
568  ignoreMapsForTriStructure);
569  const LO lclNumRows = lclRowMap.getLocalNumElements ();
570  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
571  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
572  (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
573  if (i > 0) {
575  ((this->diag_ != prev_diag_) || (this->uplo_ != prev_uplo_),
576  std::logic_error, prefix << "A_crs_'s structures in streams "
577  "are different. Please report this bug to the Ifpack2 developers.");
578  }
579  }
580  }
581 
582  if (Teuchos::nonnull (htsImpl_))
583  {
584  htsImpl_->initialize (*A_crs_);
585  isInternallyChanged_ = true;
586  }
587 
588  const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
589  kh_v_nonnull_ = false;
590  if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ != "U")
591  {
592  if (!isKokkosKernelsStream_) {
593  kh_ = Teuchos::rcp (new k_handle());
594  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
595 
596  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_, true);
597  auto Alocal = A_crs->getLocalMatrixDevice();
598  auto ptr = Alocal.graph.row_map;
599  auto ind = Alocal.graph.entries;
600  auto val = Alocal.values;
601 
602  auto numRows = Alocal.numRows();
603  kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
604  KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
605  } else {
606  kh_v_ = std::vector< Teuchos::RCP<k_handle> >(num_streams_);
607  for (int i = 0; i < num_streams_; i++) {
608  kh_v_[i] = Teuchos::rcp (new k_handle ());
609  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i], true);
610  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
611  auto ptr_i = Alocal_i.graph.row_map;
612  auto ind_i = Alocal_i.graph.entries;
613  auto val_i = Alocal_i.values;
614 
615  auto numRows_i = Alocal_i.numRows();
616 
617  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
618  kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
619  KokkosSparse::Experimental::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
620  }
621  kh_v_nonnull_ = true;
622  }
623  }
624 
625  isInitialized_ = true;
626  ++numInitialize_;
627 }
628 
629 template<class MatrixType>
630 KokkosSparse::Experimental::SPTRSVAlgorithm
632 {
633 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
634  // CuSparse only supports int type ordinals
635  // and scalar types of float, double, float complex and double complex
636  if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
637  std::is_same<int, local_ordinal_type>::value &&
638  (std::is_same<scalar_type, float>::value ||
639  std::is_same<scalar_type, double>::value ||
640  std::is_same<scalar_type, Kokkos::complex<float>>::value ||
641  std::is_same<scalar_type, Kokkos::complex<double>>::value))
642  {
643  return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
644  }
645 #endif
646  return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
647 }
648 
649 template<class MatrixType>
650 void
653 {
654  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
655  if (! out_.is_null ()) {
656  *out_ << ">>> DEBUG " << prefix << std::endl;
657  }
658 
659  if (!isKokkosKernelsStream_) {
661  (A_.is_null (), std::runtime_error, prefix << "You must call "
662  "setMatrix() with a nonnull input matrix before you may call "
663  "initialize() or compute().");
665  (A_crs_.is_null (), std::logic_error, prefix << "A_ is nonnull, but "
666  "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
667  // At this point, the matrix MUST be fillComplete.
669  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
670  "method, the matrix must be fill complete. It is not.");
671  }
672  else {
673  for(int i = 0; i < num_streams_; i++) {
675  (A_crs_v_[i].is_null (), std::runtime_error, prefix << "You must call "
676  "setMatrices() with a nonnull input matrix before you may call "
677  "initialize() or compute().");
678  // At this point, the matrix MUST be fillComplete.
680  (! A_crs_v_[i]->isFillComplete (), std::runtime_error, "If you call this "
681  "method, the matrix must be fill complete. It is not.");
682  }
683  }
684 
685  if (! isInitialized_) {
686  initialize ();
687  }
689  (! isInitialized_, std::logic_error, prefix << "initialize() should have "
690  "been called by this point, but isInitialized_ is false. "
691  "Please report this bug to the Ifpack2 developers.");
692 
693 // NOTE (Nov-09-2022):
694 // For Cuda >= 11.3 (using cusparseSpSV), always call symbolic during compute
695 // even when matrix values are changed with the same sparsity pattern.
696 // For Cuda >= 12.1 has a new cusparseSpSV_updateMatrix function just for updating the
697 // values that is substantially faster.
698 // This would all be much better handled via a KokkosSparse::Experimental::sptrsv_numeric(...)
699 // that could hide the Cuda implementation details.
700 #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
701  if constexpr ( std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda> )
702  {
703  if (this->isKokkosKernelsSptrsv_) {
704  if (Teuchos::nonnull(kh_) && !isKokkosKernelsStream_) {
705  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_, true);
706  auto Alocal = A_crs->getLocalMatrixDevice();
707  auto val = Alocal.values;
708  #if (CUSPARSE_VERSION >= 12100)
709  auto *sptrsv_handle = kh_->get_sptrsv_handle();
710  auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
711  cusparseSpSV_updateMatrix(cusparse_handle->handle,
712  cusparse_handle->spsvDescr,
713  val.data(),
714  CUSPARSE_SPSV_UPDATE_GENERAL);
715  #else
716  auto ptr = Alocal.graph.row_map;
717  auto ind = Alocal.graph.entries;
718  KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
719  #endif
720  } else if (kh_v_nonnull_) {
721  for (int i = 0; i < num_streams_; i++) {
722  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i], true);
723  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
724  auto val_i = Alocal_i.values;
725  #if (CUSPARSE_VERSION >= 12100)
726  auto *sptrsv_handle = kh_v_[i]->get_sptrsv_handle();
727  auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
728  IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(
729  cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream()));
730  cusparseSpSV_updateMatrix(cusparse_handle->handle,
731  cusparse_handle->spsvDescr,
732  val_i.data(),
733  CUSPARSE_SPSV_UPDATE_GENERAL);
734  #else
735  auto ptr_i = Alocal_i.graph.row_map;
736  auto ind_i = Alocal_i.graph.entries;
737  KokkosSparse::Experimental::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
738  #endif
739  }
740  }
741  }
742  }
743 #endif
744 
745  if (! isComputed_) {//Only compute if not computed before
746  if (Teuchos::nonnull (htsImpl_))
747  htsImpl_->compute (*A_crs_, out_);
748 
749  isComputed_ = true;
750  ++numCompute_;
751  }
752 }
753 
754 template<class MatrixType>
756 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type,
758  Tpetra::MultiVector<scalar_type, local_ordinal_type,
760  Teuchos::ETransp mode,
761  scalar_type alpha,
762  scalar_type beta) const
763 {
764  using Teuchos::RCP;
765  using Teuchos::rcp;
766  using Teuchos::rcpFromRef;
767  typedef scalar_type ST;
768  typedef Teuchos::ScalarTraits<ST> STS;
769  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
770 
771  if (! out_.is_null ()) {
772  *out_ << ">>> DEBUG " << prefix;
773  if (!isKokkosKernelsStream_) {
774  if (A_crs_.is_null ()) {
775  *out_ << "A_crs_ is null!" << std::endl;
776  }
777  else {
779  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
780  const std::string uplo = this->uplo_;
781  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
782  (mode == Teuchos::TRANS ? "T" : "N");
783  const std::string diag = this->diag_;
784  *out_ << "uplo=\"" << uplo
785  << "\", trans=\"" << trans
786  << "\", diag=\"" << diag << "\"" << std::endl;
787  }
788  }
789  else {
790  for (int i = 0; i < num_streams_; i++) {
791  if (A_crs_v_[i].is_null ()) {
792  *out_ << "A_crs_v_[" << i << "]" << " is null!" << std::endl;
793  }
794  else {
795  const std::string uplo = this->uplo_;
796  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
797  (mode == Teuchos::TRANS ? "T" : "N");
798  const std::string diag = this->diag_;
799  *out_ << "A_crs_v_[" << i << "]: "
800  << "uplo=\"" << uplo
801  << "\", trans=\"" << trans
802  << "\", diag=\"" << diag << "\"" << std::endl;
803  }
804  }
805  }
806  }
807 
809  (! isComputed (), std::runtime_error, prefix << "If compute() has not yet "
810  "been called, or if you have changed the matrix via setMatrix(), you must "
811  "call compute() before you may call this method.");
812  // If isComputed() is true, it's impossible for the matrix to be
813  // null, or for it not to be a Tpetra::CrsMatrix.
814  if (!isKokkosKernelsStream_) {
816  (A_.is_null (), std::logic_error, prefix << "A_ is null. "
817  "Please report this bug to the Ifpack2 developers.");
819  (A_crs_.is_null (), std::logic_error, prefix << "A_crs_ is null. "
820  "Please report this bug to the Ifpack2 developers.");
821  // However, it _is_ possible that the user called resumeFill() on
822  // the matrix, after calling compute(). This is NOT allowed.
824  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
825  "method, the matrix must be fill complete. It is not. This means that "
826  " you must have called resumeFill() on the matrix before calling apply(). "
827  "This is NOT allowed. Note that this class may use the matrix's data in "
828  "place without copying it. Thus, you cannot change the matrix and expect "
829  "the solver to stay the same. If you have changed the matrix, first call "
830  "fillComplete() on it, then call compute() on this object, before you call"
831  " apply(). You do NOT need to call setMatrix, as long as the matrix "
832  "itself (that is, its address in memory) is the same.");
833  }
834  else {
835  for (int i = 0; i < num_streams_; i++) {
837  (A_crs_v_[i].is_null (), std::logic_error, prefix << "A_crs_ is null. "
838  "Please report this bug to the Ifpack2 developers.");
839  // However, it _is_ possible that the user called resumeFill() on
840  // the matrix, after calling compute(). This is NOT allowed.
842  (! A_crs_v_[i]->isFillComplete (), std::runtime_error, "If you call this "
843  "method, the matrix must be fill complete. It is not. This means that "
844  " you must have called resumeFill() on the matrix before calling apply(). "
845  "This is NOT allowed. Note that this class may use the matrix's data in "
846  "place without copying it. Thus, you cannot change the matrix and expect "
847  "the solver to stay the same. If you have changed the matrix, first call "
848  "fillComplete() on it, then call compute() on this object, before you call"
849  " apply(). You do NOT need to call setMatrix, as long as the matrix "
850  "itself (that is, its address in memory) is the same.");
851  }
852  }
853 
854  RCP<const MV> X_cur;
855  RCP<MV> Y_cur;
856 
857  if (!isKokkosKernelsStream_) {
858  auto G = A_crs_->getGraph ();
860  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
861  "but A_crs_'s RowGraph G is null. "
862  "Please report this bug to the Ifpack2 developers.");
863  auto importer = G->getImporter ();
864  auto exporter = G->getExporter ();
865 
866  if (! importer.is_null ()) {
867  if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
868  X_colMap_ = rcp (new MV (importer->getTargetMap (), X.getNumVectors ()));
869  }
870  else {
871  X_colMap_->putScalar (STS::zero ());
872  }
873  // See discussion of Github Issue #672 for why the Import needs to
874  // use the ZERO CombineMode. The case where the Export is
875  // nontrivial is likely never exercised.
876  X_colMap_->doImport (X, *importer, Tpetra::ZERO);
877  }
878  X_cur = importer.is_null () ? rcpFromRef (X) :
879  Teuchos::rcp_const_cast<const MV> (X_colMap_);
880 
881  if (! exporter.is_null ()) {
882  if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
883  Y_rowMap_ = rcp (new MV (exporter->getSourceMap (), Y.getNumVectors ()));
884  }
885  else {
886  Y_rowMap_->putScalar (STS::zero ());
887  }
888  Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
889  }
890  Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
891  }
892  else {
893  // Currently assume X and Y are local vectors (same sizes as A_crs_).
894  // Should do a better job here!!!
895  X_cur = rcpFromRef (X);
896  Y_cur = rcpFromRef (Y);
897  }
898 
899  localApply (*X_cur, *Y_cur, mode, alpha, beta);
900 
901  if (!isKokkosKernelsStream_) {
902  auto G = A_crs_->getGraph ();
903  auto exporter = G->getExporter ();
904  if (! exporter.is_null ()) {
905  Y.putScalar (STS::zero ());
906  Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
907  }
908  }
909  ++numApply_;
910 }
911 
912 template<class MatrixType>
913 void
915 localTriangularSolve (const MV& Y,
916  MV& X,
917  const Teuchos::ETransp mode) const
918 {
919  using Teuchos::CONJ_TRANS;
920  using Teuchos::NO_TRANS;
921  using Teuchos::TRANS;
922  const char tfecfFuncName[] = "localTriangularSolve: ";
923 
924  if (!isKokkosKernelsStream_)
925  {
927  (! A_crs_->isFillComplete (), std::runtime_error,
928  "The matrix is not fill complete.");
930  ( A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
931  "The matrix is neither upper triangular or lower triangular. "
932  "You may only call this method if the matrix is triangular. "
933  "Remember that this is a local (per MPI process) property, and that "
934  "Tpetra only knows how to do a local (per process) triangular solve.");
935  }
936  else
937  {
938  for (int i = 0; i < num_streams_; i++) {
940  (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
941  "The matrix is not fill complete.");
943  ( A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
944  "The matrix is neither upper triangular or lower triangular. "
945  "You may only call this method if the matrix is triangular. "
946  "Remember that this is a local (per MPI process) property, and that "
947  "Tpetra only knows how to do a local (per process) triangular solve.");
948  }
949  }
951  (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
952  "X and Y must be constant stride.");
955  (STS::isComplex && mode == TRANS, std::logic_error, "This method does "
956  "not currently support non-conjugated transposed solve (mode == "
957  "Teuchos::TRANS) for complex scalar types.");
958 
959  const std::string uplo = this->uplo_;
960  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
961  (mode == Teuchos::TRANS ? "T" : "N");
962  const size_t numVecs = std::min (X.getNumVectors (), Y.getNumVectors ());
963 
964  if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N")
965  {
966  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (this->A_);
967  auto A_lclk = A_crs->getLocalMatrixDevice ();
968  auto ptr = A_lclk.graph.row_map;
969  auto ind = A_lclk.graph.entries;
970  auto val = A_lclk.values;
971 
972  for (size_t j = 0; j < numVecs; ++j) {
973  auto X_j = X.getVectorNonConst (j);
974  auto Y_j = Y.getVector (j);
975  auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
976  auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
977  auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
978  auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
979  KokkosSparse::Experimental::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
980  // TODO is this fence needed...
981  typename k_handle::HandleExecSpace().fence();
982  }
983  } // End using regular interface of Kokkos Kernels Sptrsv
984  else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans == "N")
985  {
986  std::vector<lno_row_view_t> ptr_v(num_streams_);
987  std::vector<lno_nonzero_view_t> ind_v(num_streams_);
988  std::vector<scalar_nonzero_view_t> val_v(num_streams_);
989  std::vector<k_handle *> KernelHandle_rawptr_v_(num_streams_);
990  for (size_t j = 0; j < numVecs; ++j) {
991  auto X_j = X.getVectorNonConst (j);
992  auto Y_j = Y.getVector (j);
993  auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
994  auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
995  auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
996  auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
997  std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
998  std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
999  local_ordinal_type stream_begin = 0;
1000  local_ordinal_type stream_end;
1001  for (int i = 0; i < num_streams_; i++) {
1002  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i]);
1003  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
1004  ptr_v[i] = Alocal_i.graph.row_map;
1005  ind_v[i] = Alocal_i.graph.entries;
1006  val_v[i] = Alocal_i.values;
1007  stream_end = stream_begin + Alocal_i.numRows();
1008  x_v[i] = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1009  y_v[i] = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);;
1010  KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
1011  stream_begin = stream_end;
1012  }
1013  Kokkos::fence();
1014  KokkosSparse::Experimental::sptrsv_solve_streams( exec_space_instances_, KernelHandle_rawptr_v_,
1015  ptr_v, ind_v, val_v, y_v, x_v );
1016  Kokkos::fence();
1017  }
1018  } // End using stream interface of Kokkos Kernels Sptrsv
1019  else
1020  {
1021  const std::string diag = this->diag_;
1022  // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
1023  // sequential, host-only code. See
1024  // https://github.com/kokkos/kokkos-kernels/issues/48.
1025 
1026  auto A_lcl = this->A_crs_->getLocalMatrixHost ();
1027 
1028  if (X.isConstantStride () && Y.isConstantStride ()) {
1029  auto X_lcl = X.getLocalViewHost (Tpetra::Access::ReadWrite);
1030  auto Y_lcl = Y.getLocalViewHost (Tpetra::Access::ReadOnly);
1031  KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
1032  A_lcl, Y_lcl, X_lcl);
1033  }
1034  else {
1035  for (size_t j = 0; j < numVecs; ++j) {
1036  auto X_j = X.getVectorNonConst (j);
1037  auto Y_j = Y.getVector (j);
1038  auto X_lcl = X_j->getLocalViewHost (Tpetra::Access::ReadWrite);
1039  auto Y_lcl = Y_j->getLocalViewHost (Tpetra::Access::ReadOnly);
1040  KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
1041  diag.c_str (), A_lcl, Y_lcl, X_lcl);
1042  }
1043  }
1044  }
1045 }
1046 
1047 template<class MatrixType>
1048 void
1049 LocalSparseTriangularSolver<MatrixType>::
1050 localApply (const MV& X,
1051  MV& Y,
1052  const Teuchos::ETransp mode,
1053  const scalar_type& alpha,
1054  const scalar_type& beta) const
1055 {
1056  if (mode == Teuchos::NO_TRANS && Teuchos::nonnull (htsImpl_) &&
1057  htsImpl_->isComputed ()) {
1058  htsImpl_->localApply (X, Y, mode, alpha, beta);
1059  return;
1060  }
1061 
1062  using Teuchos::RCP;
1063  typedef scalar_type ST;
1064  typedef Teuchos::ScalarTraits<ST> STS;
1065 
1066  if (beta == STS::zero ()) {
1067  if (alpha == STS::zero ()) {
1068  Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y)
1069  }
1070  else { // alpha != 0
1071  this->localTriangularSolve (X, Y, mode);
1072  if (alpha != STS::one ()) {
1073  Y.scale (alpha);
1074  }
1075  }
1076  }
1077  else { // beta != 0
1078  if (alpha == STS::zero ()) {
1079  Y.scale (beta); // Y := beta * Y
1080  }
1081  else { // alpha != 0
1082  MV Y_tmp (Y, Teuchos::Copy);
1083  this->localTriangularSolve (X, Y_tmp, mode); // Y_tmp := M * X
1084  Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
1085  }
1086  }
1087 }
1088 
1089 
1090 template <class MatrixType>
1091 int
1094  return numInitialize_;
1095 }
1096 
1097 template <class MatrixType>
1098 int
1100 getNumCompute () const {
1101  return numCompute_;
1102 }
1103 
1104 template <class MatrixType>
1105 int
1107 getNumApply () const {
1108  return numApply_;
1109 }
1110 
1111 template <class MatrixType>
1112 double
1115  return initializeTime_;
1116 }
1117 
1118 template<class MatrixType>
1119 double
1121 getComputeTime () const {
1122  return computeTime_;
1123 }
1124 
1125 template<class MatrixType>
1126 double
1128 getApplyTime() const {
1129  return applyTime_;
1130 }
1131 
1132 template <class MatrixType>
1133 std::string
1135 description () const
1136 {
1137  std::ostringstream os;
1138 
1139  // Output is a valid YAML dictionary in flow style. If you don't
1140  // like everything on a single line, you should call describe()
1141  // instead.
1142  os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
1143  if (this->getObjectLabel () != "") {
1144  os << "Label: \"" << this->getObjectLabel () << "\", ";
1145  }
1146  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
1147  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
1148 
1149  if(isKokkosKernelsSptrsv_) os << "KK-SPTRSV, ";
1150  if(isKokkosKernelsStream_) os << "KK-SolveStream, ";
1151 
1152  if (A_.is_null ()) {
1153  os << "Matrix: null";
1154  }
1155  else {
1156  os << "Matrix dimensions: ["
1157  << A_->getGlobalNumRows () << ", "
1158  << A_->getGlobalNumCols () << "]"
1159  << ", Number of nonzeros: " << A_->getGlobalNumEntries();
1160  }
1161 
1162  if (Teuchos::nonnull (htsImpl_))
1163  os << ", HTS computed: " << (htsImpl_->isComputed () ? "true" : "false");
1164  os << "}";
1165  return os.str ();
1166 }
1167 
1168 template <class MatrixType>
1171  const Teuchos::EVerbosityLevel verbLevel) const
1172 {
1173  using std::endl;
1174  // Default verbosity level is VERB_LOW, which prints only on Process
1175  // 0 of the matrix's communicator.
1176  const Teuchos::EVerbosityLevel vl
1177  = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
1178 
1179  if (vl != Teuchos::VERB_NONE) {
1180  // Print only on Process 0 in the matrix's communicator. If the
1181  // matrix is null, though, we have to get the communicator from
1182  // somewhere, so we ask Tpetra for its default communicator. If
1183  // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
1184  auto comm = A_.is_null () ?
1185  Tpetra::getDefaultComm () :
1186  A_->getComm ();
1187 
1188  // Users aren't supposed to do anything with the matrix on
1189  // processes where its communicator is null.
1190  if (! comm.is_null () && comm->getRank () == 0) {
1191  // By convention, describe() should always begin with a tab.
1192  Teuchos::OSTab tab0 (out);
1193  // Output is in YAML format. We have to escape the class name,
1194  // because it has a colon.
1195  out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1196  Teuchos::OSTab tab1 (out);
1197  out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name () << endl
1198  << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name () << endl
1199  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name () << endl
1200  << "Node: " << Teuchos::TypeNameTraits<node_type>::name () << endl;
1201  }
1202  }
1203 }
1204 
1205 template <class MatrixType>
1209 {
1211  (A_.is_null (), std::runtime_error,
1212  "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1213  "The matrix is null. Please call setMatrix() with a nonnull input "
1214  "before calling this method.");
1215  return A_->getDomainMap ();
1216 }
1217 
1218 template <class MatrixType>
1221 getRangeMap () const
1222 {
1224  (A_.is_null (), std::runtime_error,
1225  "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1226  "The matrix is null. Please call setMatrix() with a nonnull input "
1227  "before calling this method.");
1228  return A_->getRangeMap ();
1229 }
1230 
1231 template<class MatrixType>
1234 {
1235  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1236 
1237  // If the pointer didn't change, do nothing. This is reasonable
1238  // because users are supposed to call this method with the same
1239  // object over all participating processes, and pointer identity
1240  // implies object identity.
1241  if (A.getRawPtr () != A_.getRawPtr () || isInternallyChanged_) {
1242  // Check in serial or one-process mode if the matrix is square.
1244  (! A.is_null () && A->getComm ()->getSize () == 1 &&
1245  A->getLocalNumRows () != A->getLocalNumCols (),
1246  std::runtime_error, prefix << "If A's communicator only contains one "
1247  "process, then A must be square. Instead, you provided a matrix A with "
1248  << A->getLocalNumRows () << " rows and " << A->getLocalNumCols ()
1249  << " columns.");
1250 
1251  // It's legal for A to be null; in that case, you may not call
1252  // initialize() until calling setMatrix() with a nonnull input.
1253  // Regardless, setting the matrix invalidates the preconditioner.
1254  isInitialized_ = false;
1255  isComputed_ = false;
1256 
1257  if (A.is_null ()) {
1258  A_crs_ = Teuchos::null;
1259  A_ = Teuchos::null;
1260  }
1261  else { // A is not null
1263  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
1265  (A_crs.is_null (), std::invalid_argument, prefix <<
1266  "The input matrix A is not a Tpetra::CrsMatrix.");
1267  A_crs_ = A_crs;
1268  A_ = A;
1269  }
1270 
1271  if (Teuchos::nonnull (htsImpl_))
1272  htsImpl_->reset ();
1273  } // pointers are not the same
1274 }
1275 
1276 template<class MatrixType>
1277 void
1279 setStreamInfo (const bool& isKokkosKernelsStream, const int& num_streams,
1280  const std::vector<HandleExecSpace>& exec_space_instances)
1281 {
1282  isKokkosKernelsStream_ = isKokkosKernelsStream;
1283  num_streams_ = num_streams;
1284  exec_space_instances_ = exec_space_instances;
1285  A_crs_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
1286 }
1287 
1288 template<class MatrixType>
1290 setMatrices (const std::vector< Teuchos::RCP<crs_matrix_type> >& A_crs_v)
1291 {
1292  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1293 
1294  for(int i = 0; i < num_streams_; i++) {
1295  // If the pointer didn't change, do nothing. This is reasonable
1296  // because users are supposed to call this method with the same
1297  // object over all participating processes, and pointer identity
1298  // implies object identity.
1299  if (A_crs_v[i].getRawPtr () != A_crs_v_[i].getRawPtr () || isInternallyChanged_) {
1300  // Check in serial or one-process mode if the matrix is square.
1302  (! A_crs_v[i].is_null () && A_crs_v[i]->getComm ()->getSize () == 1 &&
1303  A_crs_v[i]->getLocalNumRows () != A_crs_v[i]->getLocalNumCols (),
1304  std::runtime_error, prefix << "If A's communicator only contains one "
1305  "process, then A must be square. Instead, you provided a matrix A with "
1306  << A_crs_v[i]->getLocalNumRows () << " rows and " << A_crs_v[i]->getLocalNumCols ()
1307  << " columns.");
1308 
1309  // It's legal for A to be null; in that case, you may not call
1310  // initialize() until calling setMatrix() with a nonnull input.
1311  // Regardless, setting the matrix invalidates the preconditioner.
1312  isInitialized_ = false;
1313  isComputed_ = false;
1314 
1315  if (A_crs_v[i].is_null ()) {
1316  A_crs_v_[i] = Teuchos::null;
1317  }
1318  else { // A is not null
1320  Teuchos::rcp_dynamic_cast<crs_matrix_type> (A_crs_v[i]);
1322  (A_crs.is_null (), std::invalid_argument, prefix <<
1323  "The input matrix A is not a Tpetra::CrsMatrix.");
1324  A_crs_v_[i] = A_crs;
1325  }
1326  } // pointers are not the same
1327  }
1328 }
1329 
1330 } // namespace Ifpack2
1331 
1332 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
1333  template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
1334 
1335 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:756
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1100
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1135
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
&quot;Numeric&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:652
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:189
T * get() const
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1128
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1121
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1114
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
bool isParameter(const std::string &name) const
T * getRawPtr() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner&#39;s matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1290
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1208
&quot;Preconditioner&quot; that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void initialize()
&quot;Symbolic&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:405
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1093
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver&#39;s stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1279
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner&#39;s matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1233
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string &paramName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1107
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1221
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:75
void setParameters(const Teuchos::ParameterList &params)
Set this object&#39;s parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:358
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:58
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1170
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:339
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:300
static std::string name()
std::string typeName(const T &t)
bool is_null() const