Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_LocalSparseTriangularSolver_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
12 
13 #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
14 #include "Tpetra_CrsMatrix.hpp"
15 #include "Tpetra_Core.hpp"
16 #include "Teuchos_StandardParameterEntryValidators.hpp"
17 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
18 #include "KokkosSparse_trsv.hpp"
19 
20 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
21 # include "shylu_hts.hpp"
22 #endif
23 
24 namespace Ifpack2 {
25 
26 namespace Details {
27 struct TrisolverType {
28  enum Enum {
29  Internal,
30  HTS,
31  KSPTRSV
32  };
33 
34  static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
35  type_strs.resize(3);
36  type_strs[0] = "Internal";
37  type_strs[1] = "HTS";
38  type_strs[2] = "KSPTRSV";
39  type_enums.resize(3);
40  type_enums[0] = Internal;
41  type_enums[1] = HTS;
42  type_enums[2] = KSPTRSV;
43  }
44 };
45 }
46 
47 template<class MatrixType>
48 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
49 public:
50  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
51 
52  void reset () {
53 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
54  Timpl_ = Teuchos::null;
55  levelset_block_size_ = 1;
56 #endif
57  }
58 
59  void setParameters (const Teuchos::ParameterList& pl) {
60  (void)pl;
61 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
62  const char* block_size_s = "trisolver: block size";
63  if (pl.isParameter(block_size_s)) {
64  TEUCHOS_TEST_FOR_EXCEPT_MSG( ! pl.isType<int>(block_size_s),
65  "The parameter \"" << block_size_s << "\" must be of type int.");
66  levelset_block_size_ = pl.get<int>(block_size_s);
67  }
68  if (levelset_block_size_ < 1)
69  levelset_block_size_ = 1;
70 #endif
71  }
72 
73  // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
74  // call to compute() will trigger the symbolic+numeric phase, and subsequent
75  // calls (with the same Timpl_) will trigger the numeric phase. In the call to
76  // initialize(), essentially nothing happens.
77  void initialize (const crs_matrix_type& /* unused */) {
78 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
79  reset();
80  transpose_ = conjugate_ = false;
81 #endif
82  }
83 
84  void compute (const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
85  (void)T_in;
86  (void)out;
87 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
88  using Teuchos::ArrayRCP;
89 
90  auto rowptr = T_in.getLocalRowPtrsHost();
91  auto colidx = T_in.getLocalIndicesHost();
92  auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
93  Kokkos::fence();
94 
95  Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
96  HTST::make_CrsMatrix(rowptr.size() - 1,
97  rowptr.data(), colidx.data(),
98  // For std/Kokkos::complex.
99  reinterpret_cast<const scalar_type*>(val.data()),
100  transpose_, conjugate_),
101  HtsCrsMatrixDeleter());
102 
103  if (Teuchos::nonnull(Timpl_)) {
104  // Reuse the nonzero pattern.
105  HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
106  } else {
107  // Build from scratch.
108  if (T_in.getCrsGraph().is_null()) {
109  if (Teuchos::nonnull(out))
110  *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
111  return;
112  }
113  if ( ! T_in.getCrsGraph()->isSorted()) {
114  if (Teuchos::nonnull(out))
115  *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
116  return;
117  }
118  if ( ! T_in.isStorageOptimized()) {
119  if (Teuchos::nonnull(out))
120  *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
121  return;
122  }
123 
124  typename HTST::PreprocessArgs args;
125  args.T = T_hts.get();
126  args.max_nrhs = 1;
127 #ifdef _OPENMP
128  args.nthreads = omp_get_max_threads();
129 #else
130  args.nthreads = 1;
131 #endif
132  args.save_for_reprocess = true;
133  typename HTST::Options opts;
134  opts.levelset_block_size = levelset_block_size_;
135  args.options = &opts;
136 
137  try {
138  Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
139  } catch (const std::exception& e) {
140  if (Teuchos::nonnull(out))
141  *out << "HTS preprocess threw: " << e.what() << "\n";
142  }
143  }
144 #endif
145  }
146 
147  // HTS may not be able to handle a matrix, so query whether compute()
148  // succeeded.
149  bool isComputed () {
150 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
151  return Teuchos::nonnull(Timpl_);
152 #else
153  return false;
154 #endif
155  }
156 
157  // Y := beta * Y + alpha * (M * X)
158  void localApply (const MV& X, MV& Y,
159  const Teuchos::ETransp /* mode */,
160  const scalar_type& alpha, const scalar_type& beta) const {
161  (void)X;
162  (void)Y;
163  (void)alpha;
164  (void)beta;
165 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
166  const auto& X_view = X.getLocalViewHost (Tpetra::Access::ReadOnly);
167  const auto& Y_view = Y.getLocalViewHost (Tpetra::Access::ReadWrite);
168 
169  // Only does something if #rhs > current capacity.
170  HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
171  // Switch alpha and beta because of HTS's opposite convention.
172  HTST::solve_omp(Timpl_.get(),
173  // For std/Kokkos::complex.
174  reinterpret_cast<const scalar_type*>(X_view.data()),
175  X_view.extent(1),
176  // For std/Kokkos::complex.
177  reinterpret_cast<scalar_type*>(Y_view.data()),
178  beta, alpha);
179 #endif
180  }
181 
182 private:
183 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
184  typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
185  typedef typename HTST::Impl TImpl;
186  typedef typename HTST::CrsMatrix HtsCrsMatrix;
187 
188  struct TImplDeleter {
189  void free (TImpl* impl) {
190  HTST::delete_Impl(impl);
191  }
192  };
193 
194  struct HtsCrsMatrixDeleter {
195  void free (HtsCrsMatrix* T) {
196  HTST::delete_CrsMatrix(T);
197  }
198  };
199 
200  Teuchos::RCP<TImpl> Timpl_;
201  bool transpose_, conjugate_;
202  int levelset_block_size_;
203 #endif
204 };
205 
206 template<class MatrixType>
209  A_ (A)
210 {
211  initializeState();
212 
213  if (! A.is_null ()) {
215  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
217  (A_crs.is_null (), std::invalid_argument,
218  "Ifpack2::LocalSparseTriangularSolver constructor: "
219  "The input matrix A is not a Tpetra::CrsMatrix.");
220  A_crs_ = A_crs;
221  }
222 }
223 
224 template<class MatrixType>
228  A_ (A),
229  out_ (out)
230 {
231  initializeState();
232  if (! out_.is_null ()) {
233  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
234  << std::endl;
235  }
236 
237  if (! A.is_null ()) {
239  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
241  (A_crs.is_null (), std::invalid_argument,
242  "Ifpack2::LocalSparseTriangularSolver constructor: "
243  "The input matrix A is not a Tpetra::CrsMatrix.");
244  A_crs_ = A_crs;
245  }
246 }
247 
248 template<class MatrixType>
251 {
252  initializeState();
253 }
254 
255 template<class MatrixType>
258  out_ (out)
259 {
260  initializeState();
261  if (! out_.is_null ()) {
262  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
263  << std::endl;
264  }
265 }
266 
267 template<class MatrixType>
269 {
270  isInitialized_ = false;
271  isComputed_ = false;
272  reverseStorage_ = false;
273  isInternallyChanged_ = false;
274  numInitialize_ = 0;
275  numCompute_ = 0;
276  numApply_ = 0;
277  initializeTime_ = 0.0;
278  computeTime_ = 0.0;
279  applyTime_ = 0.0;
280  isKokkosKernelsSptrsv_ = false;
281  isKokkosKernelsStream_ = false;
282  num_streams_ = 0;
283  uplo_ = "N";
284  diag_ = "N";
285 }
286 
287 template<class MatrixType>
290 {
291  if (!isKokkosKernelsStream_) {
292  if (Teuchos::nonnull (kh_)) {
293  kh_->destroy_sptrsv_handle();
294  }
295  }
296  else {
297  for (int i = 0; i < num_streams_; i++) {
298  if (Teuchos::nonnull (kh_v_[i])) {
299  kh_v_[i]->destroy_sptrsv_handle();
300  }
301  }
302  }
303 }
304 
305 template<class MatrixType>
306 void
309 {
310  using Teuchos::RCP;
312  using Teuchos::Array;
313 
314  Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
315  do {
316  static const char typeName[] = "trisolver: type";
317 
318  if ( ! pl.isType<std::string>(typeName)) break;
319 
320  // Map std::string <-> TrisolverType::Enum.
321  Array<std::string> trisolverTypeStrs;
322  Array<Details::TrisolverType::Enum> trisolverTypeEnums;
323  Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
325  s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName, false);
326 
327  trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
328  } while (0);
329 
330  if (trisolverType == Details::TrisolverType::HTS) {
331  htsImpl_ = Teuchos::rcp (new HtsImpl ());
332  htsImpl_->setParameters (pl);
333  }
334 
335  if (trisolverType == Details::TrisolverType::KSPTRSV) {
336  this->isKokkosKernelsSptrsv_ = true;
337  }
338  else {
339  this->isKokkosKernelsSptrsv_ = false;
340  }
341 
342  if (pl.isParameter("trisolver: reverse U"))
343  reverseStorage_ = pl.get<bool>("trisolver: reverse U");
344 
346  (reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
347  std::logic_error, "Ifpack2::LocalSparseTriangularSolver::setParameters: "
348  "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
349  "options. See GitHub issue #2647.");
350 }
351 
352 template<class MatrixType>
353 void
356 {
357  using Tpetra::Details::determineLocalTriangularStructure;
358 
359  using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
360  using LO = local_ordinal_type;
361 
362  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
363  if (! out_.is_null ()) {
364  *out_ << ">>> DEBUG " << prefix << std::endl;
365  }
366 
367  if (!isKokkosKernelsStream_) {
369  (A_.is_null (), std::runtime_error, prefix << "You must call "
370  "setMatrix() with a nonnull input matrix before you may call "
371  "initialize() or compute().");
372  if (A_crs_.is_null ()) {
373  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
375  (A_crs.get () == nullptr, std::invalid_argument,
376  prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
377  A_crs_ = A_crs;
378  }
379  auto G = A_crs_->getGraph ();
381  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
382  "but A_crs_'s RowGraph G is null. "
383  "Please report this bug to the Ifpack2 developers.");
384  // At this point, the graph MUST be fillComplete. The "initialize"
385  // (symbolic) part of setup only depends on the graph structure, so
386  // the matrix itself need not be fill complete.
388  (! G->isFillComplete (), std::runtime_error, "If you call this method, "
389  "the matrix's graph must be fill complete. It is not.");
390 
391  // mfh 30 Apr 2018: See GitHub Issue #2658.
392  constexpr bool ignoreMapsForTriStructure = true;
393  auto lclTriStructure = [&] {
394  auto lclMatrix = A_crs_->getLocalMatrixDevice ();
395  auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
396  auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
397  auto lclTriStruct =
398  determineLocalTriangularStructure (lclMatrix.graph,
399  lclRowMap,
400  lclColMap,
401  ignoreMapsForTriStructure);
402  const LO lclNumRows = lclRowMap.getLocalNumElements ();
403  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
404  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
405  (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
406  return lclTriStruct;
407  } ();
408 
409  if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
410  htsImpl_.is_null ()) {
411  // Reverse the storage for an upper triangular matrix
412  auto Alocal = A_crs_->getLocalMatrixDevice();
413  auto ptr = Alocal.graph.row_map;
414  auto ind = Alocal.graph.entries;
415  auto val = Alocal.values;
416 
417  auto numRows = Alocal.numRows();
418  auto numCols = Alocal.numCols();
419  auto numNnz = Alocal.nnz();
420 
421  typename decltype(ptr)::non_const_type newptr ("ptr", ptr.extent (0));
422  typename decltype(ind)::non_const_type newind ("ind", ind.extent (0));
423  decltype(val) newval ("val", val.extent (0));
424 
425  // FIXME: The code below assumes UVM
426  typename crs_matrix_type::execution_space().fence();
427  newptr(0) = 0;
428  for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
429  auto A_r = Alocal.row(numRows-1 - row);
430 
431  auto numEnt = A_r.length;
432  for (local_ordinal_type k = 0; k < numEnt; ++k) {
433  newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
434  newval(rowStart + k) = A_r.value (numEnt-1 - k);
435  }
436  rowStart += numEnt;
437  newptr(row+1) = rowStart;
438  }
439  typename crs_matrix_type::execution_space().fence();
440 
441  // Reverse maps
442  Teuchos::RCP<map_type> newRowMap, newColMap;
443  {
444  // Reverse row map
445  auto rowMap = A_->getRowMap();
446  auto numElems = rowMap->getLocalNumElements();
447  auto rowElems = rowMap->getLocalElementList();
448 
449  Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
450  for (size_t i = 0; i < numElems; i++)
451  newRowElems[i] = rowElems[numElems-1 - i];
452 
453  newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
454  }
455  {
456  // Reverse column map
457  auto colMap = A_->getColMap();
458  auto numElems = colMap->getLocalNumElements();
459  auto colElems = colMap->getLocalElementList();
460 
461  Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
462  for (size_t i = 0; i < numElems; i++)
463  newColElems[i] = colElems[numElems-1 - i];
464 
465  newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
466  }
467 
468  // Construct new matrix
469  local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
470 
471  A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
472 
473  isInternallyChanged_ = true;
474 
475  // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
476  // didn't want to break any invariants, especially considering
477  // that this branch is likely poorly tested.
478  auto newLclTriStructure =
479  determineLocalTriangularStructure (newLocalMatrix.graph,
480  newRowMap->getLocalMap (),
481  newColMap->getLocalMap (),
482  ignoreMapsForTriStructure);
483  const LO newLclNumRows = newRowMap->getLocalNumElements ();
484  this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
485  this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" :
486  (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
487  }
488  }
489  else {
490  for (int i = 0; i < num_streams_; i++) {
492  (A_crs_v_[i].is_null (), std::runtime_error, prefix << "You must call "
493  "setMatrix() with a nonnull input matrix before you may call "
494  "initialize() or compute().");
495  auto G = A_crs_v_[i]->getGraph ();
497  (G.is_null (), std::logic_error, prefix << "A_crs_ are nonnull, "
498  "but A_crs_'s RowGraph G is null. "
499  "Please report this bug to the Ifpack2 developers.");
500  // At this point, the graph MUST be fillComplete. The "initialize"
501  // (symbolic) part of setup only depends on the graph structure, so
502  // the matrix itself need not be fill complete.
504  (! G->isFillComplete (), std::runtime_error, "If you call this method, "
505  "the matrix's graph must be fill complete. It is not.");
506 
507  // mfh 30 Apr 2018: See GitHub Issue #2658.
508  constexpr bool ignoreMapsForTriStructure = true;
509  std::string prev_uplo_ = this->uplo_;
510  std::string prev_diag_ = this->diag_;
511  auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice ();
512  auto lclRowMap = A_crs_v_[i]->getRowMap ()->getLocalMap ();
513  auto lclColMap = A_crs_v_[i]->getColMap ()->getLocalMap ();
514  auto lclTriStruct =
515  determineLocalTriangularStructure (lclMatrix.graph,
516  lclRowMap,
517  lclColMap,
518  ignoreMapsForTriStructure);
519  const LO lclNumRows = lclRowMap.getLocalNumElements ();
520  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
521  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
522  (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
523  if (i > 0) {
525  ((this->diag_ != prev_diag_) || (this->uplo_ != prev_uplo_),
526  std::logic_error, prefix << "A_crs_'s structures in streams "
527  "are different. Please report this bug to the Ifpack2 developers.");
528  }
529  }
530  }
531 
532  if (Teuchos::nonnull (htsImpl_))
533  {
534  htsImpl_->initialize (*A_crs_);
535  isInternallyChanged_ = true;
536  }
537 
538  const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
539  kh_v_nonnull_ = false;
540  if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ != "U")
541  {
542  if (!isKokkosKernelsStream_) {
543  kh_ = Teuchos::rcp (new k_handle());
544  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
545 
546  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_, true);
547  auto Alocal = A_crs->getLocalMatrixDevice();
548  auto ptr = Alocal.graph.row_map;
549  auto ind = Alocal.graph.entries;
550  auto val = Alocal.values;
551 
552  auto numRows = Alocal.numRows();
553  kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
554  KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
555  } else {
556  kh_v_ = std::vector< Teuchos::RCP<k_handle> >(num_streams_);
557  for (int i = 0; i < num_streams_; i++) {
558  kh_v_[i] = Teuchos::rcp (new k_handle ());
559  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i], true);
560  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
561  auto ptr_i = Alocal_i.graph.row_map;
562  auto ind_i = Alocal_i.graph.entries;
563  auto val_i = Alocal_i.values;
564 
565  auto numRows_i = Alocal_i.numRows();
566 
567  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
568  kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
569  KokkosSparse::Experimental::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
570  }
571  kh_v_nonnull_ = true;
572  }
573  }
574 
575  isInitialized_ = true;
576  ++numInitialize_;
577 }
578 
579 template<class MatrixType>
580 KokkosSparse::Experimental::SPTRSVAlgorithm
582 {
583 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
584  // CuSparse only supports int type ordinals
585  // and scalar types of float, double, float complex and double complex
586  if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
587  std::is_same<int, local_ordinal_type>::value &&
588  (std::is_same<scalar_type, float>::value ||
589  std::is_same<scalar_type, double>::value ||
590  std::is_same<scalar_type, Kokkos::complex<float>>::value ||
591  std::is_same<scalar_type, Kokkos::complex<double>>::value))
592  {
593  return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
594  }
595 #endif
596  return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
597 }
598 
599 template<class MatrixType>
600 void
603 {
604  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
605  if (! out_.is_null ()) {
606  *out_ << ">>> DEBUG " << prefix << std::endl;
607  }
608 
609  if (!isKokkosKernelsStream_) {
611  (A_.is_null (), std::runtime_error, prefix << "You must call "
612  "setMatrix() with a nonnull input matrix before you may call "
613  "initialize() or compute().");
615  (A_crs_.is_null (), std::logic_error, prefix << "A_ is nonnull, but "
616  "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
617  // At this point, the matrix MUST be fillComplete.
619  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
620  "method, the matrix must be fill complete. It is not.");
621  }
622  else {
623  for(int i = 0; i < num_streams_; i++) {
625  (A_crs_v_[i].is_null (), std::runtime_error, prefix << "You must call "
626  "setMatrices() with a nonnull input matrix before you may call "
627  "initialize() or compute().");
628  // At this point, the matrix MUST be fillComplete.
630  (! A_crs_v_[i]->isFillComplete (), std::runtime_error, "If you call this "
631  "method, the matrix must be fill complete. It is not.");
632  }
633  }
634 
635  if (! isInitialized_) {
636  initialize ();
637  }
639  (! isInitialized_, std::logic_error, prefix << "initialize() should have "
640  "been called by this point, but isInitialized_ is false. "
641  "Please report this bug to the Ifpack2 developers.");
642 
643 // NOTE (Nov-09-2022):
644 // For Cuda >= 11.3 (using cusparseSpSV), always call symbolic during compute
645 // even when matrix values are changed with the same sparsity pattern.
646 // For Cuda >= 12.1 has a new cusparseSpSV_updateMatrix function just for updating the
647 // values that is substantially faster.
648 // This would all be much better handled via a KokkosSparse::Experimental::sptrsv_numeric(...)
649 // that could hide the Cuda implementation details.
650 #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
651  if constexpr ( std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda> )
652  {
653  if (this->isKokkosKernelsSptrsv_) {
654  if (Teuchos::nonnull(kh_) && !isKokkosKernelsStream_) {
655  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_, true);
656  auto Alocal = A_crs->getLocalMatrixDevice();
657  auto val = Alocal.values;
658  #if (CUSPARSE_VERSION >= 12100)
659  auto *sptrsv_handle = kh_->get_sptrsv_handle();
660  auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
661  cusparseSpSV_updateMatrix(cusparse_handle->handle,
662  cusparse_handle->spsvDescr,
663  val.data(),
664  CUSPARSE_SPSV_UPDATE_GENERAL);
665  #else
666  auto ptr = Alocal.graph.row_map;
667  auto ind = Alocal.graph.entries;
668  KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
669  #endif
670  } else if (kh_v_nonnull_) {
671  for (int i = 0; i < num_streams_; i++) {
672  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i], true);
673  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
674  auto val_i = Alocal_i.values;
675  #if (CUSPARSE_VERSION >= 12100)
676  auto *sptrsv_handle = kh_v_[i]->get_sptrsv_handle();
677  auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
678  KOKKOS_CUSPARSE_SAFE_CALL(
679  cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream()));
680  cusparseSpSV_updateMatrix(cusparse_handle->handle,
681  cusparse_handle->spsvDescr,
682  val_i.data(),
683  CUSPARSE_SPSV_UPDATE_GENERAL);
684  #else
685  auto ptr_i = Alocal_i.graph.row_map;
686  auto ind_i = Alocal_i.graph.entries;
687  KokkosSparse::Experimental::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
688  #endif
689  }
690  }
691  }
692  }
693 #endif
694 
695  if (! isComputed_) {//Only compute if not computed before
696  if (Teuchos::nonnull (htsImpl_))
697  htsImpl_->compute (*A_crs_, out_);
698 
699  isComputed_ = true;
700  ++numCompute_;
701  }
702 }
703 
704 template<class MatrixType>
706 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type,
708  Tpetra::MultiVector<scalar_type, local_ordinal_type,
710  Teuchos::ETransp mode,
711  scalar_type alpha,
712  scalar_type beta) const
713 {
714  using Teuchos::RCP;
715  using Teuchos::rcp;
716  using Teuchos::rcpFromRef;
717  typedef scalar_type ST;
718  typedef Teuchos::ScalarTraits<ST> STS;
719  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
720 
721  if (! out_.is_null ()) {
722  *out_ << ">>> DEBUG " << prefix;
723  if (!isKokkosKernelsStream_) {
724  if (A_crs_.is_null ()) {
725  *out_ << "A_crs_ is null!" << std::endl;
726  }
727  else {
729  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
730  const std::string uplo = this->uplo_;
731  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
732  (mode == Teuchos::TRANS ? "T" : "N");
733  const std::string diag = this->diag_;
734  *out_ << "uplo=\"" << uplo
735  << "\", trans=\"" << trans
736  << "\", diag=\"" << diag << "\"" << std::endl;
737  }
738  }
739  else {
740  for (int i = 0; i < num_streams_; i++) {
741  if (A_crs_v_[i].is_null ()) {
742  *out_ << "A_crs_v_[" << i << "]" << " is null!" << std::endl;
743  }
744  else {
745  const std::string uplo = this->uplo_;
746  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
747  (mode == Teuchos::TRANS ? "T" : "N");
748  const std::string diag = this->diag_;
749  *out_ << "A_crs_v_[" << i << "]: "
750  << "uplo=\"" << uplo
751  << "\", trans=\"" << trans
752  << "\", diag=\"" << diag << "\"" << std::endl;
753  }
754  }
755  }
756  }
757 
759  (! isComputed (), std::runtime_error, prefix << "If compute() has not yet "
760  "been called, or if you have changed the matrix via setMatrix(), you must "
761  "call compute() before you may call this method.");
762  // If isComputed() is true, it's impossible for the matrix to be
763  // null, or for it not to be a Tpetra::CrsMatrix.
764  if (!isKokkosKernelsStream_) {
766  (A_.is_null (), std::logic_error, prefix << "A_ is null. "
767  "Please report this bug to the Ifpack2 developers.");
769  (A_crs_.is_null (), std::logic_error, prefix << "A_crs_ is null. "
770  "Please report this bug to the Ifpack2 developers.");
771  // However, it _is_ possible that the user called resumeFill() on
772  // the matrix, after calling compute(). This is NOT allowed.
774  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
775  "method, the matrix must be fill complete. It is not. This means that "
776  " you must have called resumeFill() on the matrix before calling apply(). "
777  "This is NOT allowed. Note that this class may use the matrix's data in "
778  "place without copying it. Thus, you cannot change the matrix and expect "
779  "the solver to stay the same. If you have changed the matrix, first call "
780  "fillComplete() on it, then call compute() on this object, before you call"
781  " apply(). You do NOT need to call setMatrix, as long as the matrix "
782  "itself (that is, its address in memory) is the same.");
783  }
784  else {
785  for (int i = 0; i < num_streams_; i++) {
787  (A_crs_v_[i].is_null (), std::logic_error, prefix << "A_crs_ is null. "
788  "Please report this bug to the Ifpack2 developers.");
789  // However, it _is_ possible that the user called resumeFill() on
790  // the matrix, after calling compute(). This is NOT allowed.
792  (! A_crs_v_[i]->isFillComplete (), std::runtime_error, "If you call this "
793  "method, the matrix must be fill complete. It is not. This means that "
794  " you must have called resumeFill() on the matrix before calling apply(). "
795  "This is NOT allowed. Note that this class may use the matrix's data in "
796  "place without copying it. Thus, you cannot change the matrix and expect "
797  "the solver to stay the same. If you have changed the matrix, first call "
798  "fillComplete() on it, then call compute() on this object, before you call"
799  " apply(). You do NOT need to call setMatrix, as long as the matrix "
800  "itself (that is, its address in memory) is the same.");
801  }
802  }
803 
804  RCP<const MV> X_cur;
805  RCP<MV> Y_cur;
806 
807  if (!isKokkosKernelsStream_) {
808  auto G = A_crs_->getGraph ();
810  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
811  "but A_crs_'s RowGraph G is null. "
812  "Please report this bug to the Ifpack2 developers.");
813  auto importer = G->getImporter ();
814  auto exporter = G->getExporter ();
815 
816  if (! importer.is_null ()) {
817  if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
818  X_colMap_ = rcp (new MV (importer->getTargetMap (), X.getNumVectors ()));
819  }
820  else {
821  X_colMap_->putScalar (STS::zero ());
822  }
823  // See discussion of Github Issue #672 for why the Import needs to
824  // use the ZERO CombineMode. The case where the Export is
825  // nontrivial is likely never exercised.
826  X_colMap_->doImport (X, *importer, Tpetra::ZERO);
827  }
828  X_cur = importer.is_null () ? rcpFromRef (X) :
829  Teuchos::rcp_const_cast<const MV> (X_colMap_);
830 
831  if (! exporter.is_null ()) {
832  if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
833  Y_rowMap_ = rcp (new MV (exporter->getSourceMap (), Y.getNumVectors ()));
834  }
835  else {
836  Y_rowMap_->putScalar (STS::zero ());
837  }
838  Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
839  }
840  Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
841  }
842  else {
843  // Currently assume X and Y are local vectors (same sizes as A_crs_).
844  // Should do a better job here!!!
845  X_cur = rcpFromRef (X);
846  Y_cur = rcpFromRef (Y);
847  }
848 
849  localApply (*X_cur, *Y_cur, mode, alpha, beta);
850 
851  if (!isKokkosKernelsStream_) {
852  auto G = A_crs_->getGraph ();
853  auto exporter = G->getExporter ();
854  if (! exporter.is_null ()) {
855  Y.putScalar (STS::zero ());
856  Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
857  }
858  }
859  ++numApply_;
860 }
861 
862 template<class MatrixType>
863 void
865 localTriangularSolve (const MV& Y,
866  MV& X,
867  const Teuchos::ETransp mode) const
868 {
869  using Teuchos::CONJ_TRANS;
870  using Teuchos::NO_TRANS;
871  using Teuchos::TRANS;
872  const char tfecfFuncName[] = "localTriangularSolve: ";
873 
874  if (!isKokkosKernelsStream_)
875  {
877  (! A_crs_->isFillComplete (), std::runtime_error,
878  "The matrix is not fill complete.");
880  ( A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
881  "The matrix is neither upper triangular or lower triangular. "
882  "You may only call this method if the matrix is triangular. "
883  "Remember that this is a local (per MPI process) property, and that "
884  "Tpetra only knows how to do a local (per process) triangular solve.");
885  }
886  else
887  {
888  for (int i = 0; i < num_streams_; i++) {
890  (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
891  "The matrix is not fill complete.");
893  ( A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
894  "The matrix is neither upper triangular or lower triangular. "
895  "You may only call this method if the matrix is triangular. "
896  "Remember that this is a local (per MPI process) property, and that "
897  "Tpetra only knows how to do a local (per process) triangular solve.");
898  }
899  }
901  (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
902  "X and Y must be constant stride.");
905  (STS::isComplex && mode == TRANS, std::logic_error, "This method does "
906  "not currently support non-conjugated transposed solve (mode == "
907  "Teuchos::TRANS) for complex scalar types.");
908 
909  const std::string uplo = this->uplo_;
910  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
911  (mode == Teuchos::TRANS ? "T" : "N");
912  const size_t numVecs = std::min (X.getNumVectors (), Y.getNumVectors ());
913 
914  if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N")
915  {
916  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (this->A_);
917  auto A_lclk = A_crs->getLocalMatrixDevice ();
918  auto ptr = A_lclk.graph.row_map;
919  auto ind = A_lclk.graph.entries;
920  auto val = A_lclk.values;
921 
922  for (size_t j = 0; j < numVecs; ++j) {
923  auto X_j = X.getVectorNonConst (j);
924  auto Y_j = Y.getVector (j);
925  auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
926  auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
927  auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
928  auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
929  KokkosSparse::Experimental::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
930  // TODO is this fence needed...
931  typename k_handle::HandleExecSpace().fence();
932  }
933  } // End using regular interface of Kokkos Kernels Sptrsv
934  else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans == "N")
935  {
936  std::vector<lno_row_view_t> ptr_v(num_streams_);
937  std::vector<lno_nonzero_view_t> ind_v(num_streams_);
938  std::vector<scalar_nonzero_view_t> val_v(num_streams_);
939  std::vector<k_handle *> KernelHandle_rawptr_v_(num_streams_);
940  for (size_t j = 0; j < numVecs; ++j) {
941  auto X_j = X.getVectorNonConst (j);
942  auto Y_j = Y.getVector (j);
943  auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
944  auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
945  auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
946  auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
947  std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
948  std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
949  local_ordinal_type stream_begin = 0;
950  local_ordinal_type stream_end;
951  for (int i = 0; i < num_streams_; i++) {
952  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i]);
953  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
954  ptr_v[i] = Alocal_i.graph.row_map;
955  ind_v[i] = Alocal_i.graph.entries;
956  val_v[i] = Alocal_i.values;
957  stream_end = stream_begin + Alocal_i.numRows();
958  x_v[i] = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
959  y_v[i] = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);;
960  KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
961  stream_begin = stream_end;
962  }
963  Kokkos::fence();
964  KokkosSparse::Experimental::sptrsv_solve_streams( exec_space_instances_, KernelHandle_rawptr_v_,
965  ptr_v, ind_v, val_v, y_v, x_v );
966  Kokkos::fence();
967  }
968  } // End using stream interface of Kokkos Kernels Sptrsv
969  else
970  {
971  const std::string diag = this->diag_;
972  // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
973  // sequential, host-only code. See
974  // https://github.com/kokkos/kokkos-kernels/issues/48.
975 
976  auto A_lcl = this->A_crs_->getLocalMatrixHost ();
977 
978  if (X.isConstantStride () && Y.isConstantStride ()) {
979  auto X_lcl = X.getLocalViewHost (Tpetra::Access::ReadWrite);
980  auto Y_lcl = Y.getLocalViewHost (Tpetra::Access::ReadOnly);
981  KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
982  A_lcl, Y_lcl, X_lcl);
983  }
984  else {
985  for (size_t j = 0; j < numVecs; ++j) {
986  auto X_j = X.getVectorNonConst (j);
987  auto Y_j = Y.getVector (j);
988  auto X_lcl = X_j->getLocalViewHost (Tpetra::Access::ReadWrite);
989  auto Y_lcl = Y_j->getLocalViewHost (Tpetra::Access::ReadOnly);
990  KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
991  diag.c_str (), A_lcl, Y_lcl, X_lcl);
992  }
993  }
994  }
995 }
996 
997 template<class MatrixType>
998 void
999 LocalSparseTriangularSolver<MatrixType>::
1000 localApply (const MV& X,
1001  MV& Y,
1002  const Teuchos::ETransp mode,
1003  const scalar_type& alpha,
1004  const scalar_type& beta) const
1005 {
1006  if (mode == Teuchos::NO_TRANS && Teuchos::nonnull (htsImpl_) &&
1007  htsImpl_->isComputed ()) {
1008  htsImpl_->localApply (X, Y, mode, alpha, beta);
1009  return;
1010  }
1011 
1012  using Teuchos::RCP;
1013  typedef scalar_type ST;
1014  typedef Teuchos::ScalarTraits<ST> STS;
1015 
1016  if (beta == STS::zero ()) {
1017  if (alpha == STS::zero ()) {
1018  Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y)
1019  }
1020  else { // alpha != 0
1021  this->localTriangularSolve (X, Y, mode);
1022  if (alpha != STS::one ()) {
1023  Y.scale (alpha);
1024  }
1025  }
1026  }
1027  else { // beta != 0
1028  if (alpha == STS::zero ()) {
1029  Y.scale (beta); // Y := beta * Y
1030  }
1031  else { // alpha != 0
1032  MV Y_tmp (Y, Teuchos::Copy);
1033  this->localTriangularSolve (X, Y_tmp, mode); // Y_tmp := M * X
1034  Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
1035  }
1036  }
1037 }
1038 
1039 
1040 template <class MatrixType>
1041 int
1044  return numInitialize_;
1045 }
1046 
1047 template <class MatrixType>
1048 int
1050 getNumCompute () const {
1051  return numCompute_;
1052 }
1053 
1054 template <class MatrixType>
1055 int
1057 getNumApply () const {
1058  return numApply_;
1059 }
1060 
1061 template <class MatrixType>
1062 double
1065  return initializeTime_;
1066 }
1067 
1068 template<class MatrixType>
1069 double
1071 getComputeTime () const {
1072  return computeTime_;
1073 }
1074 
1075 template<class MatrixType>
1076 double
1078 getApplyTime() const {
1079  return applyTime_;
1080 }
1081 
1082 template <class MatrixType>
1083 std::string
1085 description () const
1086 {
1087  std::ostringstream os;
1088 
1089  // Output is a valid YAML dictionary in flow style. If you don't
1090  // like everything on a single line, you should call describe()
1091  // instead.
1092  os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
1093  if (this->getObjectLabel () != "") {
1094  os << "Label: \"" << this->getObjectLabel () << "\", ";
1095  }
1096  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
1097  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
1098 
1099  if(isKokkosKernelsSptrsv_) os << "KK-SPTRSV, ";
1100  if(isKokkosKernelsStream_) os << "KK-SolveStream, ";
1101 
1102  if (A_.is_null ()) {
1103  os << "Matrix: null";
1104  }
1105  else {
1106  os << "Matrix dimensions: ["
1107  << A_->getGlobalNumRows () << ", "
1108  << A_->getGlobalNumCols () << "]"
1109  << ", Number of nonzeros: " << A_->getGlobalNumEntries();
1110  }
1111 
1112  if (Teuchos::nonnull (htsImpl_))
1113  os << ", HTS computed: " << (htsImpl_->isComputed () ? "true" : "false");
1114  os << "}";
1115  return os.str ();
1116 }
1117 
1118 template <class MatrixType>
1121  const Teuchos::EVerbosityLevel verbLevel) const
1122 {
1123  using std::endl;
1124  // Default verbosity level is VERB_LOW, which prints only on Process
1125  // 0 of the matrix's communicator.
1126  const Teuchos::EVerbosityLevel vl
1127  = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
1128 
1129  if (vl != Teuchos::VERB_NONE) {
1130  // Print only on Process 0 in the matrix's communicator. If the
1131  // matrix is null, though, we have to get the communicator from
1132  // somewhere, so we ask Tpetra for its default communicator. If
1133  // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
1134  auto comm = A_.is_null () ?
1135  Tpetra::getDefaultComm () :
1136  A_->getComm ();
1137 
1138  // Users aren't supposed to do anything with the matrix on
1139  // processes where its communicator is null.
1140  if (! comm.is_null () && comm->getRank () == 0) {
1141  // By convention, describe() should always begin with a tab.
1142  Teuchos::OSTab tab0 (out);
1143  // Output is in YAML format. We have to escape the class name,
1144  // because it has a colon.
1145  out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1146  Teuchos::OSTab tab1 (out);
1147  out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name () << endl
1148  << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name () << endl
1149  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name () << endl
1150  << "Node: " << Teuchos::TypeNameTraits<node_type>::name () << endl;
1151  }
1152  }
1153 }
1154 
1155 template <class MatrixType>
1159 {
1161  (A_.is_null (), std::runtime_error,
1162  "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1163  "The matrix is null. Please call setMatrix() with a nonnull input "
1164  "before calling this method.");
1165  return A_->getDomainMap ();
1166 }
1167 
1168 template <class MatrixType>
1171 getRangeMap () const
1172 {
1174  (A_.is_null (), std::runtime_error,
1175  "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1176  "The matrix is null. Please call setMatrix() with a nonnull input "
1177  "before calling this method.");
1178  return A_->getRangeMap ();
1179 }
1180 
1181 template<class MatrixType>
1184 {
1185  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1186 
1187  // If the pointer didn't change, do nothing. This is reasonable
1188  // because users are supposed to call this method with the same
1189  // object over all participating processes, and pointer identity
1190  // implies object identity.
1191  if (A.getRawPtr () != A_.getRawPtr () || isInternallyChanged_) {
1192  // Check in serial or one-process mode if the matrix is square.
1194  (! A.is_null () && A->getComm ()->getSize () == 1 &&
1195  A->getLocalNumRows () != A->getLocalNumCols (),
1196  std::runtime_error, prefix << "If A's communicator only contains one "
1197  "process, then A must be square. Instead, you provided a matrix A with "
1198  << A->getLocalNumRows () << " rows and " << A->getLocalNumCols ()
1199  << " columns.");
1200 
1201  // It's legal for A to be null; in that case, you may not call
1202  // initialize() until calling setMatrix() with a nonnull input.
1203  // Regardless, setting the matrix invalidates the preconditioner.
1204  isInitialized_ = false;
1205  isComputed_ = false;
1206 
1207  if (A.is_null ()) {
1208  A_crs_ = Teuchos::null;
1209  A_ = Teuchos::null;
1210  }
1211  else { // A is not null
1213  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
1215  (A_crs.is_null (), std::invalid_argument, prefix <<
1216  "The input matrix A is not a Tpetra::CrsMatrix.");
1217  A_crs_ = A_crs;
1218  A_ = A;
1219  }
1220 
1221  if (Teuchos::nonnull (htsImpl_))
1222  htsImpl_->reset ();
1223  } // pointers are not the same
1224 }
1225 
1226 template<class MatrixType>
1227 void
1229 setStreamInfo (const bool& isKokkosKernelsStream, const int& num_streams,
1230  const std::vector<HandleExecSpace>& exec_space_instances)
1231 {
1232  isKokkosKernelsStream_ = isKokkosKernelsStream;
1233  num_streams_ = num_streams;
1234  exec_space_instances_ = exec_space_instances;
1235  A_crs_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
1236 }
1237 
1238 template<class MatrixType>
1240 setMatrices (const std::vector< Teuchos::RCP<crs_matrix_type> >& A_crs_v)
1241 {
1242  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1243 
1244  for(int i = 0; i < num_streams_; i++) {
1245  // If the pointer didn't change, do nothing. This is reasonable
1246  // because users are supposed to call this method with the same
1247  // object over all participating processes, and pointer identity
1248  // implies object identity.
1249  if (A_crs_v[i].getRawPtr () != A_crs_v_[i].getRawPtr () || isInternallyChanged_) {
1250  // Check in serial or one-process mode if the matrix is square.
1252  (! A_crs_v[i].is_null () && A_crs_v[i]->getComm ()->getSize () == 1 &&
1253  A_crs_v[i]->getLocalNumRows () != A_crs_v[i]->getLocalNumCols (),
1254  std::runtime_error, prefix << "If A's communicator only contains one "
1255  "process, then A must be square. Instead, you provided a matrix A with "
1256  << A_crs_v[i]->getLocalNumRows () << " rows and " << A_crs_v[i]->getLocalNumCols ()
1257  << " columns.");
1258 
1259  // It's legal for A to be null; in that case, you may not call
1260  // initialize() until calling setMatrix() with a nonnull input.
1261  // Regardless, setting the matrix invalidates the preconditioner.
1262  isInitialized_ = false;
1263  isComputed_ = false;
1264 
1265  if (A_crs_v[i].is_null ()) {
1266  A_crs_v_[i] = Teuchos::null;
1267  }
1268  else { // A is not null
1270  Teuchos::rcp_dynamic_cast<crs_matrix_type> (A_crs_v[i]);
1272  (A_crs.is_null (), std::invalid_argument, prefix <<
1273  "The input matrix A is not a Tpetra::CrsMatrix.");
1274  A_crs_v_[i] = A_crs;
1275  }
1276  } // pointers are not the same
1277  }
1278 }
1279 
1280 } // namespace Ifpack2
1281 
1282 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
1283  template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
1284 
1285 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:706
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1050
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1085
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
&quot;Numeric&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:602
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:189
T * get() const
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1078
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1071
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1064
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
bool isParameter(const std::string &name) const
T * getRawPtr() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner&#39;s matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1240
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1158
&quot;Preconditioner&quot; that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void initialize()
&quot;Symbolic&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:355
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1043
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver&#39;s stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1229
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner&#39;s matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1183
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string &paramName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1057
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1171
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:75
void setParameters(const Teuchos::ParameterList &params)
Set this object&#39;s parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:308
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:58
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1120
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:289
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:250
static std::string name()
std::string typeName(const T &t)
bool is_null() const