Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_LocalSparseTriangularSolver_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
44 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
45 
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_Core.hpp"
48 #include "Teuchos_StandardParameterEntryValidators.hpp"
49 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
50 #include "KokkosSparse_trsv.hpp"
51 
52 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
53 # include "shylu_hts.hpp"
54 #endif
55 
56 namespace Ifpack2 {
57 
58 namespace Details {
59 struct TrisolverType {
60  enum Enum {
61  Internal,
62  HTS,
63  KSPTRSV
64  };
65 
66  static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
67  type_strs.resize(3);
68  type_strs[0] = "Internal";
69  type_strs[1] = "HTS";
70  type_strs[2] = "KSPTRSV";
71  type_enums.resize(3);
72  type_enums[0] = Internal;
73  type_enums[1] = HTS;
74  type_enums[2] = KSPTRSV;
75  }
76 };
77 }
78 
79 template<class MatrixType>
80 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
81 public:
82  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
83 
84  void reset () {
85 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
86  Timpl_ = Teuchos::null;
87  levelset_block_size_ = 1;
88 #endif
89  }
90 
91  void setParameters (const Teuchos::ParameterList& pl) {
92  (void)pl;
93 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
94  const char* block_size_s = "trisolver: block size";
95  if (pl.isParameter(block_size_s)) {
96  TEUCHOS_TEST_FOR_EXCEPT_MSG( ! pl.isType<int>(block_size_s),
97  "The parameter \"" << block_size_s << "\" must be of type int.");
98  levelset_block_size_ = pl.get<int>(block_size_s);
99  }
100  if (levelset_block_size_ < 1)
101  levelset_block_size_ = 1;
102 #endif
103  }
104 
105  // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
106  // call to compute() will trigger the symbolic+numeric phase, and subsequent
107  // calls (with the same Timpl_) will trigger the numeric phase. In the call to
108  // initialize(), essentially nothing happens.
109  void initialize (const crs_matrix_type& /* unused */) {
110 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
111  reset();
112  transpose_ = conjugate_ = false;
113 #endif
114  }
115 
116  void compute (const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
117  (void)T_in;
118  (void)out;
119 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
120  using Teuchos::ArrayRCP;
121 
122  auto rowptr = T_in.getLocalRowPtrsHost();
123  auto colidx = T_in.getLocalIndicesHost();
124  auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
125  Kokkos::fence();
126 
127  Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
128  HTST::make_CrsMatrix(rowptr.size() - 1,
129  rowptr.data(), colidx.data(),
130  // For std/Kokkos::complex.
131  reinterpret_cast<const scalar_type*>(val.data()),
132  transpose_, conjugate_),
133  HtsCrsMatrixDeleter());
134 
135  if (Teuchos::nonnull(Timpl_)) {
136  // Reuse the nonzero pattern.
137  HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
138  } else {
139  // Build from scratch.
140  if (T_in.getCrsGraph().is_null()) {
141  if (Teuchos::nonnull(out))
142  *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
143  return;
144  }
145  if ( ! T_in.getCrsGraph()->isSorted()) {
146  if (Teuchos::nonnull(out))
147  *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
148  return;
149  }
150  if ( ! T_in.isStorageOptimized()) {
151  if (Teuchos::nonnull(out))
152  *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
153  return;
154  }
155 
156  typename HTST::PreprocessArgs args;
157  args.T = T_hts.get();
158  args.max_nrhs = 1;
159 #ifdef _OPENMP
160  args.nthreads = omp_get_max_threads();
161 #else
162  args.nthreads = 1;
163 #endif
164  args.save_for_reprocess = true;
165  typename HTST::Options opts;
166  opts.levelset_block_size = levelset_block_size_;
167  args.options = &opts;
168 
169  try {
170  Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
171  } catch (const std::exception& e) {
172  if (Teuchos::nonnull(out))
173  *out << "HTS preprocess threw: " << e.what() << "\n";
174  }
175  }
176 #endif
177  }
178 
179  // HTS may not be able to handle a matrix, so query whether compute()
180  // succeeded.
181  bool isComputed () {
182 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
183  return Teuchos::nonnull(Timpl_);
184 #else
185  return false;
186 #endif
187  }
188 
189  // Y := beta * Y + alpha * (M * X)
190  void localApply (const MV& X, MV& Y,
191  const Teuchos::ETransp /* mode */,
192  const scalar_type& alpha, const scalar_type& beta) const {
193  (void)X;
194  (void)Y;
195  (void)alpha;
196  (void)beta;
197 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
198  const auto& X_view = X.getLocalViewHost (Tpetra::Access::ReadOnly);
199  const auto& Y_view = Y.getLocalViewHost (Tpetra::Access::ReadWrite);
200 
201  // Only does something if #rhs > current capacity.
202  HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
203  // Switch alpha and beta because of HTS's opposite convention.
204  HTST::solve_omp(Timpl_.get(),
205  // For std/Kokkos::complex.
206  reinterpret_cast<const scalar_type*>(X_view.data()),
207  X_view.extent(1),
208  // For std/Kokkos::complex.
209  reinterpret_cast<scalar_type*>(Y_view.data()),
210  beta, alpha);
211 #endif
212  }
213 
214 private:
215 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216  typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
217  typedef typename HTST::Impl TImpl;
218  typedef typename HTST::CrsMatrix HtsCrsMatrix;
219 
220  struct TImplDeleter {
221  void free (TImpl* impl) {
222  HTST::delete_Impl(impl);
223  }
224  };
225 
226  struct HtsCrsMatrixDeleter {
227  void free (HtsCrsMatrix* T) {
228  HTST::delete_CrsMatrix(T);
229  }
230  };
231 
232  Teuchos::RCP<TImpl> Timpl_;
233  bool transpose_, conjugate_;
234  int levelset_block_size_;
235 #endif
236 };
237 
238 template<class MatrixType>
241  A_ (A)
242 {
243  initializeState();
244 
245  if (! A.is_null ()) {
247  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
249  (A_crs.is_null (), std::invalid_argument,
250  "Ifpack2::LocalSparseTriangularSolver constructor: "
251  "The input matrix A is not a Tpetra::CrsMatrix.");
252  A_crs_ = A_crs;
253  }
254 }
255 
256 template<class MatrixType>
260  A_ (A),
261  out_ (out)
262 {
263  initializeState();
264  if (! out_.is_null ()) {
265  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
266  << std::endl;
267  }
268 
269  if (! A.is_null ()) {
271  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
273  (A_crs.is_null (), std::invalid_argument,
274  "Ifpack2::LocalSparseTriangularSolver constructor: "
275  "The input matrix A is not a Tpetra::CrsMatrix.");
276  A_crs_ = A_crs;
277  }
278 }
279 
280 template<class MatrixType>
283 {
284  initializeState();
285 }
286 
287 template<class MatrixType>
290  out_ (out)
291 {
292  initializeState();
293  if (! out_.is_null ()) {
294  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
295  << std::endl;
296  }
297 }
298 
299 template<class MatrixType>
301 {
302  isInitialized_ = false;
303  isComputed_ = false;
304  reverseStorage_ = false;
305  isInternallyChanged_ = false;
306  numInitialize_ = 0;
307  numCompute_ = 0;
308  numApply_ = 0;
309  initializeTime_ = 0.0;
310  computeTime_ = 0.0;
311  applyTime_ = 0.0;
312  isKokkosKernelsSptrsv_ = false;
313  isKokkosKernelsStream_ = false;
314  num_streams_ = 0;
315  uplo_ = "N";
316  diag_ = "N";
317 }
318 
319 template<class MatrixType>
322 {
323  if (!isKokkosKernelsStream_) {
324  if (Teuchos::nonnull (kh_)) {
325  kh_->destroy_sptrsv_handle();
326  }
327  }
328  else {
329  for (int i = 0; i < num_streams_; i++) {
330  if (Teuchos::nonnull (kh_v_[i])) {
331  kh_v_[i]->destroy_sptrsv_handle();
332  }
333  }
334  }
335 }
336 
337 template<class MatrixType>
338 void
341 {
342  using Teuchos::RCP;
344  using Teuchos::Array;
345 
346  Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
347  do {
348  static const char typeName[] = "trisolver: type";
349 
350  if ( ! pl.isType<std::string>(typeName)) break;
351 
352  // Map std::string <-> TrisolverType::Enum.
353  Array<std::string> trisolverTypeStrs;
354  Array<Details::TrisolverType::Enum> trisolverTypeEnums;
355  Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
357  s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName, false);
358 
359  trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
360  } while (0);
361 
362  if (trisolverType == Details::TrisolverType::HTS) {
363  htsImpl_ = Teuchos::rcp (new HtsImpl ());
364  htsImpl_->setParameters (pl);
365  }
366 
367  if (trisolverType == Details::TrisolverType::KSPTRSV) {
368  this->isKokkosKernelsSptrsv_ = true;
369  }
370  else {
371  this->isKokkosKernelsSptrsv_ = false;
372  }
373 
374  if (pl.isParameter("trisolver: reverse U"))
375  reverseStorage_ = pl.get<bool>("trisolver: reverse U");
376 
378  (reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
379  std::logic_error, "Ifpack2::LocalSparseTriangularSolver::setParameters: "
380  "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
381  "options. See GitHub issue #2647.");
382 }
383 
384 template<class MatrixType>
385 void
388 {
389  using Tpetra::Details::determineLocalTriangularStructure;
390 
391  using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
392  using LO = local_ordinal_type;
393 
394  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
395  if (! out_.is_null ()) {
396  *out_ << ">>> DEBUG " << prefix << std::endl;
397  }
398 
399  if (!isKokkosKernelsStream_) {
401  (A_.is_null (), std::runtime_error, prefix << "You must call "
402  "setMatrix() with a nonnull input matrix before you may call "
403  "initialize() or compute().");
404  if (A_crs_.is_null ()) {
405  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
407  (A_crs.get () == nullptr, std::invalid_argument,
408  prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
409  A_crs_ = A_crs;
410  }
411  auto G = A_crs_->getGraph ();
413  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
414  "but A_crs_'s RowGraph G is null. "
415  "Please report this bug to the Ifpack2 developers.");
416  // At this point, the graph MUST be fillComplete. The "initialize"
417  // (symbolic) part of setup only depends on the graph structure, so
418  // the matrix itself need not be fill complete.
420  (! G->isFillComplete (), std::runtime_error, "If you call this method, "
421  "the matrix's graph must be fill complete. It is not.");
422 
423  // mfh 30 Apr 2018: See GitHub Issue #2658.
424  constexpr bool ignoreMapsForTriStructure = true;
425  auto lclTriStructure = [&] {
426  auto lclMatrix = A_crs_->getLocalMatrixDevice ();
427  auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
428  auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
429  auto lclTriStruct =
430  determineLocalTriangularStructure (lclMatrix.graph,
431  lclRowMap,
432  lclColMap,
433  ignoreMapsForTriStructure);
434  const LO lclNumRows = lclRowMap.getLocalNumElements ();
435  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
436  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
437  (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
438  return lclTriStruct;
439  } ();
440 
441  if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
442  htsImpl_.is_null ()) {
443  // Reverse the storage for an upper triangular matrix
444  auto Alocal = A_crs_->getLocalMatrixDevice();
445  auto ptr = Alocal.graph.row_map;
446  auto ind = Alocal.graph.entries;
447  auto val = Alocal.values;
448 
449  auto numRows = Alocal.numRows();
450  auto numCols = Alocal.numCols();
451  auto numNnz = Alocal.nnz();
452 
453  typename decltype(ptr)::non_const_type newptr ("ptr", ptr.extent (0));
454  typename decltype(ind)::non_const_type newind ("ind", ind.extent (0));
455  decltype(val) newval ("val", val.extent (0));
456 
457  // FIXME: The code below assumes UVM
458  typename crs_matrix_type::execution_space().fence();
459  newptr(0) = 0;
460  for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
461  auto A_r = Alocal.row(numRows-1 - row);
462 
463  auto numEnt = A_r.length;
464  for (local_ordinal_type k = 0; k < numEnt; ++k) {
465  newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
466  newval(rowStart + k) = A_r.value (numEnt-1 - k);
467  }
468  rowStart += numEnt;
469  newptr(row+1) = rowStart;
470  }
471  typename crs_matrix_type::execution_space().fence();
472 
473  // Reverse maps
474  Teuchos::RCP<map_type> newRowMap, newColMap;
475  {
476  // Reverse row map
477  auto rowMap = A_->getRowMap();
478  auto numElems = rowMap->getLocalNumElements();
479  auto rowElems = rowMap->getLocalElementList();
480 
481  Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
482  for (size_t i = 0; i < numElems; i++)
483  newRowElems[i] = rowElems[numElems-1 - i];
484 
485  newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
486  }
487  {
488  // Reverse column map
489  auto colMap = A_->getColMap();
490  auto numElems = colMap->getLocalNumElements();
491  auto colElems = colMap->getLocalElementList();
492 
493  Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
494  for (size_t i = 0; i < numElems; i++)
495  newColElems[i] = colElems[numElems-1 - i];
496 
497  newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
498  }
499 
500  // Construct new matrix
501  local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
502 
503  A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
504 
505  isInternallyChanged_ = true;
506 
507  // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
508  // didn't want to break any invariants, especially considering
509  // that this branch is likely poorly tested.
510  auto newLclTriStructure =
511  determineLocalTriangularStructure (newLocalMatrix.graph,
512  newRowMap->getLocalMap (),
513  newColMap->getLocalMap (),
514  ignoreMapsForTriStructure);
515  const LO newLclNumRows = newRowMap->getLocalNumElements ();
516  this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
517  this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" :
518  (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
519  }
520  }
521  else {
522  for (int i = 0; i < num_streams_; i++) {
524  (A_crs_v_[i].is_null (), std::runtime_error, prefix << "You must call "
525  "setMatrix() with a nonnull input matrix before you may call "
526  "initialize() or compute().");
527  auto G = A_crs_v_[i]->getGraph ();
529  (G.is_null (), std::logic_error, prefix << "A_crs_ are nonnull, "
530  "but A_crs_'s RowGraph G is null. "
531  "Please report this bug to the Ifpack2 developers.");
532  // At this point, the graph MUST be fillComplete. The "initialize"
533  // (symbolic) part of setup only depends on the graph structure, so
534  // the matrix itself need not be fill complete.
536  (! G->isFillComplete (), std::runtime_error, "If you call this method, "
537  "the matrix's graph must be fill complete. It is not.");
538 
539  // mfh 30 Apr 2018: See GitHub Issue #2658.
540  constexpr bool ignoreMapsForTriStructure = true;
541  std::string prev_uplo_ = this->uplo_;
542  std::string prev_diag_ = this->diag_;
543  auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice ();
544  auto lclRowMap = A_crs_v_[i]->getRowMap ()->getLocalMap ();
545  auto lclColMap = A_crs_v_[i]->getColMap ()->getLocalMap ();
546  auto lclTriStruct =
547  determineLocalTriangularStructure (lclMatrix.graph,
548  lclRowMap,
549  lclColMap,
550  ignoreMapsForTriStructure);
551  const LO lclNumRows = lclRowMap.getLocalNumElements ();
552  this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
553  this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
554  (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
555  if (i > 0) {
557  ((this->diag_ != prev_diag_) || (this->uplo_ != prev_uplo_),
558  std::logic_error, prefix << "A_crs_'s structures in streams "
559  "are different. Please report this bug to the Ifpack2 developers.");
560  }
561  }
562  }
563 
564  if (Teuchos::nonnull (htsImpl_))
565  {
566  htsImpl_->initialize (*A_crs_);
567  isInternallyChanged_ = true;
568  }
569 
570  const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
571  kh_v_nonnull_ = false;
572  if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ != "U")
573  {
574  if (!isKokkosKernelsStream_) {
575  kh_ = Teuchos::rcp (new k_handle());
576  }
577  else {
578  kh_v_ = std::vector< Teuchos::RCP<k_handle> >(num_streams_);
579  for (int i = 0; i < num_streams_; i++) {
580  kh_v_[i] = Teuchos::rcp (new k_handle ());
581  }
582  kh_v_nonnull_ = true;
583  }
584  }
585 
586  isInitialized_ = true;
587  ++numInitialize_;
588 }
589 
590 template<class MatrixType>
591 void
594 {
595  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
596  if (! out_.is_null ()) {
597  *out_ << ">>> DEBUG " << prefix << std::endl;
598  }
599 
600  if (!isKokkosKernelsStream_) {
602  (A_.is_null (), std::runtime_error, prefix << "You must call "
603  "setMatrix() with a nonnull input matrix before you may call "
604  "initialize() or compute().");
606  (A_crs_.is_null (), std::logic_error, prefix << "A_ is nonnull, but "
607  "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
608  // At this point, the matrix MUST be fillComplete.
610  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
611  "method, the matrix must be fill complete. It is not.");
612  }
613  else {
614  for(int i = 0; i < num_streams_; i++) {
616  (A_crs_v_[i].is_null (), std::runtime_error, prefix << "You must call "
617  "setMatrices() with a nonnull input matrix before you may call "
618  "initialize() or compute().");
619  // At this point, the matrix MUST be fillComplete.
621  (! A_crs_v_[i]->isFillComplete (), std::runtime_error, "If you call this "
622  "method, the matrix must be fill complete. It is not.");
623  }
624  }
625 
626  if (! isInitialized_) {
627  initialize ();
628  }
630  (! isInitialized_, std::logic_error, prefix << "initialize() should have "
631  "been called by this point, but isInitialized_ is false. "
632  "Please report this bug to the Ifpack2 developers.");
633 
634  if (! isComputed_) {//Only compute if not computed before
635  if (Teuchos::nonnull (htsImpl_))
636  htsImpl_->compute (*A_crs_, out_);
637 
638  if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_) {
639  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
640 
641  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
642  auto Alocal = A_crs->getLocalMatrixDevice();
643  auto ptr = Alocal.graph.row_map;
644  auto ind = Alocal.graph.entries;
645  auto val = Alocal.values;
646 
647  auto numRows = Alocal.numRows();
648 
649  // Destroy existing handle and recreate in case new matrix provided - requires rerunning symbolic analysis
650  kh_->destroy_sptrsv_handle();
651 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
652  // CuSparse only supports int type ordinals
653  // and scalar types of float, double, float complex and double complex
654  if (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
655  std::is_same<int, local_ordinal_type>::value &&
656  (std::is_same<scalar_type, float>::value ||
657  std::is_same<scalar_type, double>::value ||
658  std::is_same<scalar_type, Kokkos::complex<float>>::value ||
659  std::is_same<scalar_type, Kokkos::complex<double>>::value))
660  {
661  kh_->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE, numRows, is_lower_tri);
662  }
663  else
664 #endif
665  {
666  kh_->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1, numRows, is_lower_tri);
667  }
668  KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
669  }
670  else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_) {
671  const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
672 
673  for (int i = 0; i < num_streams_; i++) {
674  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i]);
675  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
676  auto ptr_i = Alocal_i.graph.row_map;
677  auto ind_i = Alocal_i.graph.entries;
678  auto val_i = Alocal_i.values;
679 
680  auto numRows_i = Alocal_i.numRows();
681 
682  // Destroy existing handle and recreate in case new matrix provided - requires rerunning symbolic analysis
683  kh_v_[i]->destroy_sptrsv_handle();
684 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
685  // CuSparse only supports int type ordinals
686  // and scalar types of float, double, float complex and double complex
687  if (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
688  std::is_same<int, local_ordinal_type>::value &&
689  (std::is_same<scalar_type, float>::value ||
690  std::is_same<scalar_type, double>::value ||
691  std::is_same<scalar_type, Kokkos::complex<float>>::value ||
692  std::is_same<scalar_type, Kokkos::complex<double>>::value))
693  {
694  kh_v_[i]->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE, numRows_i, is_lower_tri);
695  }
696  else
697 #endif
698  {
699  kh_v_[i]->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1, numRows_i, is_lower_tri);
700  }
701  KokkosSparse::Experimental::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
702  }
703  }
704 
705  isComputed_ = true;
706  ++numCompute_;
707  }
708 }
709 
710 template<class MatrixType>
712 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type,
714  Tpetra::MultiVector<scalar_type, local_ordinal_type,
716  Teuchos::ETransp mode,
717  scalar_type alpha,
718  scalar_type beta) const
719 {
720  using Teuchos::RCP;
721  using Teuchos::rcp;
722  using Teuchos::rcpFromRef;
723  typedef scalar_type ST;
724  typedef Teuchos::ScalarTraits<ST> STS;
725  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
726 
727  if (! out_.is_null ()) {
728  *out_ << ">>> DEBUG " << prefix;
729  if (!isKokkosKernelsStream_) {
730  if (A_crs_.is_null ()) {
731  *out_ << "A_crs_ is null!" << std::endl;
732  }
733  else {
735  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
736  const std::string uplo = this->uplo_;
737  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
738  (mode == Teuchos::TRANS ? "T" : "N");
739  const std::string diag = this->diag_;
740  *out_ << "uplo=\"" << uplo
741  << "\", trans=\"" << trans
742  << "\", diag=\"" << diag << "\"" << std::endl;
743  }
744  }
745  else {
746  for (int i = 0; i < num_streams_; i++) {
747  if (A_crs_v_[i].is_null ()) {
748  *out_ << "A_crs_v_[" << i << "]" << " is null!" << std::endl;
749  }
750  else {
751  const std::string uplo = this->uplo_;
752  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
753  (mode == Teuchos::TRANS ? "T" : "N");
754  const std::string diag = this->diag_;
755  *out_ << "A_crs_v_[" << i << "]: "
756  << "uplo=\"" << uplo
757  << "\", trans=\"" << trans
758  << "\", diag=\"" << diag << "\"" << std::endl;
759  }
760  }
761  }
762  }
763 
765  (! isComputed (), std::runtime_error, prefix << "If compute() has not yet "
766  "been called, or if you have changed the matrix via setMatrix(), you must "
767  "call compute() before you may call this method.");
768  // If isComputed() is true, it's impossible for the matrix to be
769  // null, or for it not to be a Tpetra::CrsMatrix.
770  if (!isKokkosKernelsStream_) {
772  (A_.is_null (), std::logic_error, prefix << "A_ is null. "
773  "Please report this bug to the Ifpack2 developers.");
775  (A_crs_.is_null (), std::logic_error, prefix << "A_crs_ is null. "
776  "Please report this bug to the Ifpack2 developers.");
777  // However, it _is_ possible that the user called resumeFill() on
778  // the matrix, after calling compute(). This is NOT allowed.
780  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
781  "method, the matrix must be fill complete. It is not. This means that "
782  " you must have called resumeFill() on the matrix before calling apply(). "
783  "This is NOT allowed. Note that this class may use the matrix's data in "
784  "place without copying it. Thus, you cannot change the matrix and expect "
785  "the solver to stay the same. If you have changed the matrix, first call "
786  "fillComplete() on it, then call compute() on this object, before you call"
787  " apply(). You do NOT need to call setMatrix, as long as the matrix "
788  "itself (that is, its address in memory) is the same.");
789  }
790  else {
791  for (int i = 0; i < num_streams_; i++) {
793  (A_crs_v_[i].is_null (), std::logic_error, prefix << "A_crs_ is null. "
794  "Please report this bug to the Ifpack2 developers.");
795  // However, it _is_ possible that the user called resumeFill() on
796  // the matrix, after calling compute(). This is NOT allowed.
798  (! A_crs_v_[i]->isFillComplete (), std::runtime_error, "If you call this "
799  "method, the matrix must be fill complete. It is not. This means that "
800  " you must have called resumeFill() on the matrix before calling apply(). "
801  "This is NOT allowed. Note that this class may use the matrix's data in "
802  "place without copying it. Thus, you cannot change the matrix and expect "
803  "the solver to stay the same. If you have changed the matrix, first call "
804  "fillComplete() on it, then call compute() on this object, before you call"
805  " apply(). You do NOT need to call setMatrix, as long as the matrix "
806  "itself (that is, its address in memory) is the same.");
807  }
808  }
809 
810  RCP<const MV> X_cur;
811  RCP<MV> Y_cur;
812 
813  if (!isKokkosKernelsStream_) {
814  auto G = A_crs_->getGraph ();
816  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
817  "but A_crs_'s RowGraph G is null. "
818  "Please report this bug to the Ifpack2 developers.");
819  auto importer = G->getImporter ();
820  auto exporter = G->getExporter ();
821 
822  if (! importer.is_null ()) {
823  if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
824  X_colMap_ = rcp (new MV (importer->getTargetMap (), X.getNumVectors ()));
825  }
826  else {
827  X_colMap_->putScalar (STS::zero ());
828  }
829  // See discussion of Github Issue #672 for why the Import needs to
830  // use the ZERO CombineMode. The case where the Export is
831  // nontrivial is likely never exercised.
832  X_colMap_->doImport (X, *importer, Tpetra::ZERO);
833  }
834  X_cur = importer.is_null () ? rcpFromRef (X) :
835  Teuchos::rcp_const_cast<const MV> (X_colMap_);
836 
837  if (! exporter.is_null ()) {
838  if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
839  Y_rowMap_ = rcp (new MV (exporter->getSourceMap (), Y.getNumVectors ()));
840  }
841  else {
842  Y_rowMap_->putScalar (STS::zero ());
843  }
844  Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
845  }
846  Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
847  }
848  else {
849  // Currently assume X and Y are local vectors (same sizes as A_crs_).
850  // Should do a better job here!!!
851  X_cur = rcpFromRef (X);
852  Y_cur = rcpFromRef (Y);
853  }
854 
855  localApply (*X_cur, *Y_cur, mode, alpha, beta);
856 
857  if (!isKokkosKernelsStream_) {
858  auto G = A_crs_->getGraph ();
859  auto exporter = G->getExporter ();
860  if (! exporter.is_null ()) {
861  Y.putScalar (STS::zero ());
862  Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
863  }
864  }
865  ++numApply_;
866 }
867 
868 template<class MatrixType>
869 void
871 localTriangularSolve (const MV& Y,
872  MV& X,
873  const Teuchos::ETransp mode) const
874 {
875  using Teuchos::CONJ_TRANS;
876  using Teuchos::NO_TRANS;
877  using Teuchos::TRANS;
878  const char tfecfFuncName[] = "localTriangularSolve: ";
879 
880  if (!isKokkosKernelsStream_)
881  {
883  (! A_crs_->isFillComplete (), std::runtime_error,
884  "The matrix is not fill complete.");
886  ( A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
887  "The matrix is neither upper triangular or lower triangular. "
888  "You may only call this method if the matrix is triangular. "
889  "Remember that this is a local (per MPI process) property, and that "
890  "Tpetra only knows how to do a local (per process) triangular solve.");
891  }
892  else
893  {
894  for (int i = 0; i < num_streams_; i++) {
896  (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
897  "The matrix is not fill complete.");
899  ( A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
900  "The matrix is neither upper triangular or lower triangular. "
901  "You may only call this method if the matrix is triangular. "
902  "Remember that this is a local (per MPI process) property, and that "
903  "Tpetra only knows how to do a local (per process) triangular solve.");
904  }
905  }
907  (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
908  "X and Y must be constant stride.");
911  (STS::isComplex && mode == TRANS, std::logic_error, "This method does "
912  "not currently support non-conjugated transposed solve (mode == "
913  "Teuchos::TRANS) for complex scalar types.");
914 
915  const std::string uplo = this->uplo_;
916  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
917  (mode == Teuchos::TRANS ? "T" : "N");
918  const size_t numVecs = std::min (X.getNumVectors (), Y.getNumVectors ());
919 
920  if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N")
921  {
922  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (this->A_);
923  auto A_lclk = A_crs->getLocalMatrixDevice ();
924  auto ptr = A_lclk.graph.row_map;
925  auto ind = A_lclk.graph.entries;
926  auto val = A_lclk.values;
927 
928  for (size_t j = 0; j < numVecs; ++j) {
929  auto X_j = X.getVectorNonConst (j);
930  auto Y_j = Y.getVector (j);
931  auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
932  auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
933  auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
934  auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
935  KokkosSparse::Experimental::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
936  // TODO is this fence needed...
937  typename k_handle::HandleExecSpace().fence();
938  }
939  } // End using regular interface of Kokkos Kernels Sptrsv
940  else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans == "N")
941  {
942  std::vector<lno_row_view_t> ptr_v(num_streams_);
943  std::vector<lno_nonzero_view_t> ind_v(num_streams_);
944  std::vector<scalar_nonzero_view_t> val_v(num_streams_);
945  std::vector<k_handle *> KernelHandle_rawptr_v_(num_streams_);
946  for (size_t j = 0; j < numVecs; ++j) {
947  auto X_j = X.getVectorNonConst (j);
948  auto Y_j = Y.getVector (j);
949  auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
950  auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
951  auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
952  auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
953  std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
954  std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
955  local_ordinal_type stream_begin = 0;
956  local_ordinal_type stream_end;
957  for (int i = 0; i < num_streams_; i++) {
958  auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i]);
959  auto Alocal_i = A_crs_i->getLocalMatrixDevice();
960  ptr_v[i] = Alocal_i.graph.row_map;
961  ind_v[i] = Alocal_i.graph.entries;
962  val_v[i] = Alocal_i.values;
963  stream_end = stream_begin + Alocal_i.numRows();
964  x_v[i] = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
965  y_v[i] = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);;
966  KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
967  stream_begin = stream_end;
968  }
969  KokkosSparse::Experimental::sptrsv_solve_streams( exec_space_instances_, KernelHandle_rawptr_v_,
970  ptr_v, ind_v, val_v, y_v, x_v );
971  for (int i = 0; i < num_streams_; i++) {
972  exec_space_instances_[i].fence();
973  }
974  }
975  } // End using stream interface of Kokkos Kernels Sptrsv
976  else
977  {
978  const std::string diag = this->diag_;
979  // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
980  // sequential, host-only code. See
981  // https://github.com/kokkos/kokkos-kernels/issues/48.
982 
983  auto A_lcl = this->A_crs_->getLocalMatrixHost ();
984 
985  if (X.isConstantStride () && Y.isConstantStride ()) {
986  auto X_lcl = X.getLocalViewHost (Tpetra::Access::ReadWrite);
987  auto Y_lcl = Y.getLocalViewHost (Tpetra::Access::ReadOnly);
988  KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
989  A_lcl, Y_lcl, X_lcl);
990  }
991  else {
992  for (size_t j = 0; j < numVecs; ++j) {
993  auto X_j = X.getVectorNonConst (j);
994  auto Y_j = Y.getVector (j);
995  auto X_lcl = X_j->getLocalViewHost (Tpetra::Access::ReadWrite);
996  auto Y_lcl = Y_j->getLocalViewHost (Tpetra::Access::ReadOnly);
997  KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
998  diag.c_str (), A_lcl, Y_lcl, X_lcl);
999  }
1000  }
1001  }
1002 }
1003 
1004 template<class MatrixType>
1005 void
1006 LocalSparseTriangularSolver<MatrixType>::
1007 localApply (const MV& X,
1008  MV& Y,
1009  const Teuchos::ETransp mode,
1010  const scalar_type& alpha,
1011  const scalar_type& beta) const
1012 {
1013  if (mode == Teuchos::NO_TRANS && Teuchos::nonnull (htsImpl_) &&
1014  htsImpl_->isComputed ()) {
1015  htsImpl_->localApply (X, Y, mode, alpha, beta);
1016  return;
1017  }
1018 
1019  using Teuchos::RCP;
1020  typedef scalar_type ST;
1021  typedef Teuchos::ScalarTraits<ST> STS;
1022 
1023  if (beta == STS::zero ()) {
1024  if (alpha == STS::zero ()) {
1025  Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y)
1026  }
1027  else { // alpha != 0
1028  this->localTriangularSolve (X, Y, mode);
1029  if (alpha != STS::one ()) {
1030  Y.scale (alpha);
1031  }
1032  }
1033  }
1034  else { // beta != 0
1035  if (alpha == STS::zero ()) {
1036  Y.scale (beta); // Y := beta * Y
1037  }
1038  else { // alpha != 0
1039  MV Y_tmp (Y, Teuchos::Copy);
1040  this->localTriangularSolve (X, Y_tmp, mode); // Y_tmp := M * X
1041  Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
1042  }
1043  }
1044 }
1045 
1046 
1047 template <class MatrixType>
1048 int
1051  return numInitialize_;
1052 }
1053 
1054 template <class MatrixType>
1055 int
1057 getNumCompute () const {
1058  return numCompute_;
1059 }
1060 
1061 template <class MatrixType>
1062 int
1064 getNumApply () const {
1065  return numApply_;
1066 }
1067 
1068 template <class MatrixType>
1069 double
1072  return initializeTime_;
1073 }
1074 
1075 template<class MatrixType>
1076 double
1078 getComputeTime () const {
1079  return computeTime_;
1080 }
1081 
1082 template<class MatrixType>
1083 double
1085 getApplyTime() const {
1086  return applyTime_;
1087 }
1088 
1089 template <class MatrixType>
1090 std::string
1092 description () const
1093 {
1094  std::ostringstream os;
1095 
1096  // Output is a valid YAML dictionary in flow style. If you don't
1097  // like everything on a single line, you should call describe()
1098  // instead.
1099  os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
1100  if (this->getObjectLabel () != "") {
1101  os << "Label: \"" << this->getObjectLabel () << "\", ";
1102  }
1103  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
1104  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
1105 
1106  if(isKokkosKernelsSptrsv_) os << "KK-SPTRSV, ";
1107  if(isKokkosKernelsStream_) os << "KK-SolveStream, ";
1108 
1109  if (A_.is_null ()) {
1110  os << "Matrix: null";
1111  }
1112  else {
1113  os << "Matrix dimensions: ["
1114  << A_->getGlobalNumRows () << ", "
1115  << A_->getGlobalNumCols () << "]"
1116  << ", Number of nonzeros: " << A_->getGlobalNumEntries();
1117  }
1118 
1119  if (Teuchos::nonnull (htsImpl_))
1120  os << ", HTS computed: " << (htsImpl_->isComputed () ? "true" : "false");
1121  os << "}";
1122  return os.str ();
1123 }
1124 
1125 template <class MatrixType>
1128  const Teuchos::EVerbosityLevel verbLevel) const
1129 {
1130  using std::endl;
1131  // Default verbosity level is VERB_LOW, which prints only on Process
1132  // 0 of the matrix's communicator.
1133  const Teuchos::EVerbosityLevel vl
1134  = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
1135 
1136  if (vl != Teuchos::VERB_NONE) {
1137  // Print only on Process 0 in the matrix's communicator. If the
1138  // matrix is null, though, we have to get the communicator from
1139  // somewhere, so we ask Tpetra for its default communicator. If
1140  // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
1141  auto comm = A_.is_null () ?
1142  Tpetra::getDefaultComm () :
1143  A_->getComm ();
1144 
1145  // Users aren't supposed to do anything with the matrix on
1146  // processes where its communicator is null.
1147  if (! comm.is_null () && comm->getRank () == 0) {
1148  // By convention, describe() should always begin with a tab.
1149  Teuchos::OSTab tab0 (out);
1150  // Output is in YAML format. We have to escape the class name,
1151  // because it has a colon.
1152  out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1153  Teuchos::OSTab tab1 (out);
1154  out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name () << endl
1155  << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name () << endl
1156  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name () << endl
1157  << "Node: " << Teuchos::TypeNameTraits<node_type>::name () << endl;
1158  }
1159  }
1160 }
1161 
1162 template <class MatrixType>
1166 {
1168  (A_.is_null (), std::runtime_error,
1169  "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1170  "The matrix is null. Please call setMatrix() with a nonnull input "
1171  "before calling this method.");
1172  return A_->getDomainMap ();
1173 }
1174 
1175 template <class MatrixType>
1178 getRangeMap () const
1179 {
1181  (A_.is_null (), std::runtime_error,
1182  "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1183  "The matrix is null. Please call setMatrix() with a nonnull input "
1184  "before calling this method.");
1185  return A_->getRangeMap ();
1186 }
1187 
1188 template<class MatrixType>
1191 {
1192  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1193 
1194  // If the pointer didn't change, do nothing. This is reasonable
1195  // because users are supposed to call this method with the same
1196  // object over all participating processes, and pointer identity
1197  // implies object identity.
1198  if (A.getRawPtr () != A_.getRawPtr () || isInternallyChanged_) {
1199  // Check in serial or one-process mode if the matrix is square.
1201  (! A.is_null () && A->getComm ()->getSize () == 1 &&
1202  A->getLocalNumRows () != A->getLocalNumCols (),
1203  std::runtime_error, prefix << "If A's communicator only contains one "
1204  "process, then A must be square. Instead, you provided a matrix A with "
1205  << A->getLocalNumRows () << " rows and " << A->getLocalNumCols ()
1206  << " columns.");
1207 
1208  // It's legal for A to be null; in that case, you may not call
1209  // initialize() until calling setMatrix() with a nonnull input.
1210  // Regardless, setting the matrix invalidates the preconditioner.
1211  isInitialized_ = false;
1212  isComputed_ = false;
1213 
1214  if (A.is_null ()) {
1215  A_crs_ = Teuchos::null;
1216  A_ = Teuchos::null;
1217  }
1218  else { // A is not null
1220  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
1222  (A_crs.is_null (), std::invalid_argument, prefix <<
1223  "The input matrix A is not a Tpetra::CrsMatrix.");
1224  A_crs_ = A_crs;
1225  A_ = A;
1226  }
1227 
1228  if (Teuchos::nonnull (htsImpl_))
1229  htsImpl_->reset ();
1230  } // pointers are not the same
1231 
1232  //NOTE (Nov-09-2022):
1233  //For Cuda >= 11.3 (using cusparseSpSV), always call compute before apply,
1234  //even when matrix values are changed with the same sparsity pattern.
1235  //So, force isComputed_ to FALSE here
1236 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1237  isComputed_ = false;
1238 #endif
1239 }
1240 
1241 template<class MatrixType>
1242 void
1244 setStreamInfo (const bool& isKokkosKernelsStream, const int& num_streams,
1245  const std::vector<HandleExecSpace>& exec_space_instances)
1246 {
1247  isKokkosKernelsStream_ = isKokkosKernelsStream;
1248  num_streams_ = num_streams;
1249  exec_space_instances_ = exec_space_instances;
1250  A_crs_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
1251 }
1252 
1253 template<class MatrixType>
1255 setMatrices (const std::vector< Teuchos::RCP<crs_matrix_type> >& A_crs_v)
1256 {
1257  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1258 
1259  for(int i = 0; i < num_streams_; i++) {
1260  // If the pointer didn't change, do nothing. This is reasonable
1261  // because users are supposed to call this method with the same
1262  // object over all participating processes, and pointer identity
1263  // implies object identity.
1264  if (A_crs_v[i].getRawPtr () != A_crs_v_[i].getRawPtr () || isInternallyChanged_) {
1265  // Check in serial or one-process mode if the matrix is square.
1267  (! A_crs_v[i].is_null () && A_crs_v[i]->getComm ()->getSize () == 1 &&
1268  A_crs_v[i]->getLocalNumRows () != A_crs_v[i]->getLocalNumCols (),
1269  std::runtime_error, prefix << "If A's communicator only contains one "
1270  "process, then A must be square. Instead, you provided a matrix A with "
1271  << A_crs_v[i]->getLocalNumRows () << " rows and " << A_crs_v[i]->getLocalNumCols ()
1272  << " columns.");
1273 
1274  // It's legal for A to be null; in that case, you may not call
1275  // initialize() until calling setMatrix() with a nonnull input.
1276  // Regardless, setting the matrix invalidates the preconditioner.
1277  isInitialized_ = false;
1278  isComputed_ = false;
1279 
1280  if (A_crs_v[i].is_null ()) {
1281  A_crs_v_[i] = Teuchos::null;
1282  }
1283  else { // A is not null
1285  Teuchos::rcp_dynamic_cast<crs_matrix_type> (A_crs_v[i]);
1287  (A_crs.is_null (), std::invalid_argument, prefix <<
1288  "The input matrix A is not a Tpetra::CrsMatrix.");
1289  A_crs_v_[i] = A_crs;
1290  }
1291  } // pointers are not the same
1292  }
1293 
1294  //NOTE (Nov-09-2022):
1295  //For Cuda >= 11.3 (using cusparseSpSV), always call compute before apply,
1296  //even when matrix values are changed with the same sparsity pattern.
1297  //So, force isComputed_ to FALSE here
1298 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1299  isComputed_ = false;
1300 #endif
1301 }
1302 
1303 } // namespace Ifpack2
1304 
1305 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
1306  template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
1307 
1308 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:712
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1057
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1092
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
&quot;Numeric&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:593
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:95
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:222
T * get() const
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1085
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1078
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1071
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:97
bool isParameter(const std::string &name) const
T * getRawPtr() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner&#39;s matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1255
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1165
&quot;Preconditioner&quot; that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:79
void initialize()
&quot;Symbolic&quot; phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:387
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1050
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver&#39;s stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1244
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:102
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner&#39;s matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1190
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string &paramName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1064
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:93
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1178
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:108
void setParameters(const Teuchos::ParameterList &params)
Set this object&#39;s parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:340
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:91
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1127
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:321
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:282
static std::string name()
std::string typeName(const T &t)
bool is_null() const