10 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
13 #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
14 #include "Tpetra_CrsMatrix.hpp"
15 #include "Tpetra_Core.hpp"
16 #include "Teuchos_StandardParameterEntryValidators.hpp"
17 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
18 #include "KokkosSparse_trsv.hpp"
20 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
21 # include "shylu_hts.hpp"
27 struct TrisolverType {
36 type_strs[0] =
"Internal";
38 type_strs[2] =
"KSPTRSV";
40 type_enums[0] = Internal;
42 type_enums[2] = KSPTRSV;
47 template<
class MatrixType>
48 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
50 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type>
crs_matrix_type;
53 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
54 Timpl_ = Teuchos::null;
55 levelset_block_size_ = 1;
61 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
62 const char* block_size_s =
"trisolver: block size";
65 "The parameter \"" << block_size_s <<
"\" must be of type int.");
66 levelset_block_size_ = pl.
get<
int>(block_size_s);
68 if (levelset_block_size_ < 1)
69 levelset_block_size_ = 1;
78 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
80 transpose_ = conjugate_ =
false;
87 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
90 auto rowptr = T_in.getLocalRowPtrsHost();
91 auto colidx = T_in.getLocalIndicesHost();
92 auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
96 HTST::make_CrsMatrix(rowptr.size() - 1,
97 rowptr.data(), colidx.data(),
100 transpose_, conjugate_),
101 HtsCrsMatrixDeleter());
105 HTST::reprocess_numeric(Timpl_.get(), T_hts.
get());
108 if (T_in.getCrsGraph().is_null()) {
110 *out <<
"HTS compute failed because T_in.getCrsGraph().is_null().\n";
113 if ( ! T_in.getCrsGraph()->isSorted()) {
115 *out <<
"HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
118 if ( ! T_in.isStorageOptimized()) {
120 *out <<
"HTS compute failed because ! T_in.isStorageOptimized().\n";
124 typename HTST::PreprocessArgs args;
125 args.T = T_hts.
get();
128 args.nthreads = omp_get_max_threads();
132 args.save_for_reprocess =
true;
133 typename HTST::Options opts;
134 opts.levelset_block_size = levelset_block_size_;
135 args.options = &opts;
138 Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
139 }
catch (
const std::exception& e) {
141 *out <<
"HTS preprocess threw: " << e.what() <<
"\n";
150 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
158 void localApply (
const MV& X, MV& Y,
165 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
166 const auto& X_view = X.getLocalViewHost (Tpetra::Access::ReadOnly);
167 const auto& Y_view = Y.getLocalViewHost (Tpetra::Access::ReadWrite);
170 HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
172 HTST::solve_omp(Timpl_.get(),
174 reinterpret_cast<const scalar_type*
>(X_view.data()),
183 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
184 typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
185 typedef typename HTST::Impl TImpl;
186 typedef typename HTST::CrsMatrix HtsCrsMatrix;
188 struct TImplDeleter {
189 void free (TImpl* impl) {
190 HTST::delete_Impl(impl);
194 struct HtsCrsMatrixDeleter {
195 void free (HtsCrsMatrix* T) {
196 HTST::delete_CrsMatrix(T);
201 bool transpose_, conjugate_;
202 int levelset_block_size_;
206 template<
class MatrixType>
217 (A_crs.
is_null (), std::invalid_argument,
218 "Ifpack2::LocalSparseTriangularSolver constructor: "
219 "The input matrix A is not a Tpetra::CrsMatrix.");
224 template<
class MatrixType>
233 *out_ <<
">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
241 (A_crs.
is_null (), std::invalid_argument,
242 "Ifpack2::LocalSparseTriangularSolver constructor: "
243 "The input matrix A is not a Tpetra::CrsMatrix.");
248 template<
class MatrixType>
255 template<
class MatrixType>
262 *out_ <<
">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
267 template<
class MatrixType>
270 isInitialized_ =
false;
272 reverseStorage_ =
false;
273 isInternallyChanged_ =
false;
277 initializeTime_ = 0.0;
280 isKokkosKernelsSptrsv_ =
false;
281 isKokkosKernelsStream_ =
false;
287 template<
class MatrixType>
291 if (!isKokkosKernelsStream_) {
293 kh_->destroy_sptrsv_handle();
297 for (
int i = 0; i < num_streams_; i++) {
299 kh_v_[i]->destroy_sptrsv_handle();
305 template<
class MatrixType>
314 Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
316 static const char typeName[] =
"trisolver: type";
318 if ( ! pl.
isType<std::string>(typeName))
break;
321 Array<std::string> trisolverTypeStrs;
322 Array<Details::TrisolverType::Enum> trisolverTypeEnums;
323 Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
325 s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName,
false);
330 if (trisolverType == Details::TrisolverType::HTS) {
332 htsImpl_->setParameters (pl);
335 if (trisolverType == Details::TrisolverType::KSPTRSV) {
336 this->isKokkosKernelsSptrsv_ =
true;
339 this->isKokkosKernelsSptrsv_ =
false;
343 reverseStorage_ = pl.
get<
bool>(
"trisolver: reverse U");
346 (reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
347 std::logic_error,
"Ifpack2::LocalSparseTriangularSolver::setParameters: "
348 "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
349 "options. See GitHub issue #2647.");
352 template<
class MatrixType>
357 using Tpetra::Details::determineLocalTriangularStructure;
359 using local_matrix_type =
typename crs_matrix_type::local_matrix_device_type;
362 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::initialize: ";
363 if (! out_.is_null ()) {
364 *out_ <<
">>> DEBUG " << prefix << std::endl;
367 if (!isKokkosKernelsStream_) {
369 (A_.
is_null (), std::runtime_error, prefix <<
"You must call "
370 "setMatrix() with a nonnull input matrix before you may call "
371 "initialize() or compute().");
372 if (A_crs_.is_null ()) {
375 (A_crs.get () ==
nullptr, std::invalid_argument,
376 prefix <<
"The input matrix A is not a Tpetra::CrsMatrix.");
379 auto G = A_crs_->getGraph ();
381 (G.is_null (), std::logic_error, prefix <<
"A_ and A_crs_ are nonnull, "
382 "but A_crs_'s RowGraph G is null. "
383 "Please report this bug to the Ifpack2 developers.");
388 (! G->isFillComplete (), std::runtime_error,
"If you call this method, "
389 "the matrix's graph must be fill complete. It is not.");
392 constexpr
bool ignoreMapsForTriStructure =
true;
393 auto lclTriStructure = [&] {
394 auto lclMatrix = A_crs_->getLocalMatrixDevice ();
395 auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
396 auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
398 determineLocalTriangularStructure (lclMatrix.graph,
401 ignoreMapsForTriStructure);
402 const LO lclNumRows = lclRowMap.getLocalNumElements ();
403 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ?
"U" :
"N";
404 this->uplo_ = lclTriStruct.couldBeLowerTriangular ?
"L" :
405 (lclTriStruct.couldBeUpperTriangular ?
"U" :
"N");
409 if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
410 htsImpl_.is_null ()) {
412 auto Alocal = A_crs_->getLocalMatrixDevice();
413 auto ptr = Alocal.graph.row_map;
414 auto ind = Alocal.graph.entries;
415 auto val = Alocal.values;
417 auto numRows = Alocal.numRows();
418 auto numCols = Alocal.numCols();
419 auto numNnz = Alocal.nnz();
421 typename decltype(ptr)::non_const_type newptr (
"ptr", ptr.extent (0));
422 typename decltype(ind)::non_const_type newind (
"ind", ind.extent (0));
423 decltype(val) newval (
"val", val.extent (0));
426 typename crs_matrix_type::execution_space().fence();
429 auto A_r = Alocal.row(numRows-1 - row);
431 auto numEnt = A_r.length;
433 newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
434 newval(rowStart + k) = A_r.value (numEnt-1 - k);
437 newptr(row+1) = rowStart;
439 typename crs_matrix_type::execution_space().fence();
445 auto rowMap = A_->getRowMap();
446 auto numElems = rowMap->getLocalNumElements();
447 auto rowElems = rowMap->getLocalElementList();
450 for (
size_t i = 0; i < numElems; i++)
451 newRowElems[i] = rowElems[numElems-1 - i];
453 newRowMap =
Teuchos::rcp(
new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
457 auto colMap = A_->getColMap();
458 auto numElems = colMap->getLocalNumElements();
459 auto colElems = colMap->getLocalElementList();
462 for (
size_t i = 0; i < numElems; i++)
463 newColElems[i] = colElems[numElems-1 - i];
465 newColMap =
Teuchos::rcp(
new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
469 local_matrix_type newLocalMatrix(
"Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
473 isInternallyChanged_ =
true;
478 auto newLclTriStructure =
479 determineLocalTriangularStructure (newLocalMatrix.graph,
480 newRowMap->getLocalMap (),
481 newColMap->getLocalMap (),
482 ignoreMapsForTriStructure);
483 const LO newLclNumRows = newRowMap->getLocalNumElements ();
484 this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ?
"U" :
"N";
485 this->uplo_ = newLclTriStructure.couldBeLowerTriangular ?
"L" :
486 (newLclTriStructure.couldBeUpperTriangular ?
"U" :
"N");
490 for (
int i = 0; i < num_streams_; i++) {
492 (A_crs_v_[i].
is_null (), std::runtime_error, prefix <<
"You must call "
493 "setMatrix() with a nonnull input matrix before you may call "
494 "initialize() or compute().");
495 auto G = A_crs_v_[i]->getGraph ();
497 (G.is_null (), std::logic_error, prefix <<
"A_crs_ are nonnull, "
498 "but A_crs_'s RowGraph G is null. "
499 "Please report this bug to the Ifpack2 developers.");
504 (! G->isFillComplete (), std::runtime_error,
"If you call this method, "
505 "the matrix's graph must be fill complete. It is not.");
508 constexpr
bool ignoreMapsForTriStructure =
true;
509 std::string prev_uplo_ = this->uplo_;
510 std::string prev_diag_ = this->diag_;
511 auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice ();
512 auto lclRowMap = A_crs_v_[i]->getRowMap ()->getLocalMap ();
513 auto lclColMap = A_crs_v_[i]->getColMap ()->getLocalMap ();
515 determineLocalTriangularStructure (lclMatrix.graph,
518 ignoreMapsForTriStructure);
519 const LO lclNumRows = lclRowMap.getLocalNumElements ();
520 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ?
"U" :
"N";
521 this->uplo_ = lclTriStruct.couldBeLowerTriangular ?
"L" :
522 (lclTriStruct.couldBeUpperTriangular ?
"U" :
"N");
525 ((this->diag_ != prev_diag_) || (this->uplo_ != prev_uplo_),
526 std::logic_error, prefix <<
"A_crs_'s structures in streams "
527 "are different. Please report this bug to the Ifpack2 developers.");
534 htsImpl_->initialize (*A_crs_);
535 isInternallyChanged_ =
true;
538 const bool ksptrsv_valid_uplo = (this->uplo_ !=
"N");
539 kh_v_nonnull_ =
false;
540 if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ !=
"U")
542 if (!isKokkosKernelsStream_) {
544 const bool is_lower_tri = (this->uplo_ ==
"L") ?
true :
false;
546 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_,
true);
547 auto Alocal = A_crs->getLocalMatrixDevice();
548 auto ptr = Alocal.graph.row_map;
549 auto ind = Alocal.graph.entries;
550 auto val = Alocal.values;
552 auto numRows = Alocal.numRows();
553 kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
554 KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
556 kh_v_ = std::vector< Teuchos::RCP<k_handle> >(num_streams_);
557 for (
int i = 0; i < num_streams_; i++) {
559 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_v_[i],
true);
560 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
561 auto ptr_i = Alocal_i.graph.row_map;
562 auto ind_i = Alocal_i.graph.entries;
563 auto val_i = Alocal_i.values;
565 auto numRows_i = Alocal_i.numRows();
567 const bool is_lower_tri = (this->uplo_ ==
"L") ?
true :
false;
568 kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
569 KokkosSparse::Experimental::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
571 kh_v_nonnull_ =
true;
575 isInitialized_ =
true;
579 template<
class MatrixType>
580 KokkosSparse::Experimental::SPTRSVAlgorithm
583 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
586 if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
587 std::is_same<int, local_ordinal_type>::value &&
588 (std::is_same<scalar_type, float>::value ||
589 std::is_same<scalar_type, double>::value ||
590 std::is_same<scalar_type, Kokkos::complex<float>>::value ||
591 std::is_same<scalar_type, Kokkos::complex<double>>::value))
593 return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
596 return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
599 template<
class MatrixType>
604 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::compute: ";
605 if (! out_.is_null ()) {
606 *out_ <<
">>> DEBUG " << prefix << std::endl;
609 if (!isKokkosKernelsStream_) {
611 (A_.
is_null (), std::runtime_error, prefix <<
"You must call "
612 "setMatrix() with a nonnull input matrix before you may call "
613 "initialize() or compute().");
615 (A_crs_.is_null (), std::logic_error, prefix <<
"A_ is nonnull, but "
616 "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
619 (! A_crs_->isFillComplete (), std::runtime_error,
"If you call this "
620 "method, the matrix must be fill complete. It is not.");
623 for(
int i = 0; i < num_streams_; i++) {
625 (A_crs_v_[i].
is_null (), std::runtime_error, prefix <<
"You must call "
626 "setMatrices() with a nonnull input matrix before you may call "
627 "initialize() or compute().");
630 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
"If you call this "
631 "method, the matrix must be fill complete. It is not.");
635 if (! isInitialized_) {
639 (! isInitialized_, std::logic_error, prefix <<
"initialize() should have "
640 "been called by this point, but isInitialized_ is false. "
641 "Please report this bug to the Ifpack2 developers.");
650 #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
651 if constexpr ( std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda> )
653 if (this->isKokkosKernelsSptrsv_) {
655 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_,
true);
656 auto Alocal = A_crs->getLocalMatrixDevice();
657 auto val = Alocal.values;
658 #if (CUSPARSE_VERSION >= 12100)
659 auto *sptrsv_handle = kh_->get_sptrsv_handle();
660 auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
661 cusparseSpSV_updateMatrix(cusparse_handle->handle,
662 cusparse_handle->spsvDescr,
664 CUSPARSE_SPSV_UPDATE_GENERAL);
666 auto ptr = Alocal.graph.row_map;
667 auto ind = Alocal.graph.entries;
668 KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
670 }
else if (kh_v_nonnull_) {
671 for (
int i = 0; i < num_streams_; i++) {
672 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_v_[i],
true);
673 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
674 auto val_i = Alocal_i.values;
675 #if (CUSPARSE_VERSION >= 12100)
676 auto *sptrsv_handle = kh_v_[i]->get_sptrsv_handle();
677 auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
678 KOKKOS_CUSPARSE_SAFE_CALL(
679 cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream()));
680 cusparseSpSV_updateMatrix(cusparse_handle->handle,
681 cusparse_handle->spsvDescr,
683 CUSPARSE_SPSV_UPDATE_GENERAL);
685 auto ptr_i = Alocal_i.graph.row_map;
686 auto ind_i = Alocal_i.graph.entries;
687 KokkosSparse::Experimental::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
697 htsImpl_->compute (*A_crs_, out_);
704 template<
class MatrixType>
716 using Teuchos::rcpFromRef;
719 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::apply: ";
721 if (! out_.is_null ()) {
722 *out_ <<
">>> DEBUG " << prefix;
723 if (!isKokkosKernelsStream_) {
724 if (A_crs_.is_null ()) {
725 *out_ <<
"A_crs_ is null!" << std::endl;
730 const std::string uplo = this->uplo_;
733 const std::string diag = this->diag_;
734 *out_ <<
"uplo=\"" << uplo
735 <<
"\", trans=\"" << trans
736 <<
"\", diag=\"" << diag <<
"\"" << std::endl;
740 for (
int i = 0; i < num_streams_; i++) {
742 *out_ <<
"A_crs_v_[" << i <<
"]" <<
" is null!" << std::endl;
745 const std::string uplo = this->uplo_;
748 const std::string diag = this->diag_;
749 *out_ <<
"A_crs_v_[" << i <<
"]: "
751 <<
"\", trans=\"" << trans
752 <<
"\", diag=\"" << diag <<
"\"" << std::endl;
759 (! isComputed (), std::runtime_error, prefix <<
"If compute() has not yet "
760 "been called, or if you have changed the matrix via setMatrix(), you must "
761 "call compute() before you may call this method.");
764 if (!isKokkosKernelsStream_) {
766 (A_.
is_null (), std::logic_error, prefix <<
"A_ is null. "
767 "Please report this bug to the Ifpack2 developers.");
769 (A_crs_.is_null (), std::logic_error, prefix <<
"A_crs_ is null. "
770 "Please report this bug to the Ifpack2 developers.");
774 (! A_crs_->isFillComplete (), std::runtime_error,
"If you call this "
775 "method, the matrix must be fill complete. It is not. This means that "
776 " you must have called resumeFill() on the matrix before calling apply(). "
777 "This is NOT allowed. Note that this class may use the matrix's data in "
778 "place without copying it. Thus, you cannot change the matrix and expect "
779 "the solver to stay the same. If you have changed the matrix, first call "
780 "fillComplete() on it, then call compute() on this object, before you call"
781 " apply(). You do NOT need to call setMatrix, as long as the matrix "
782 "itself (that is, its address in memory) is the same.");
785 for (
int i = 0; i < num_streams_; i++) {
787 (A_crs_v_[i].
is_null (), std::logic_error, prefix <<
"A_crs_ is null. "
788 "Please report this bug to the Ifpack2 developers.");
792 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
"If you call this "
793 "method, the matrix must be fill complete. It is not. This means that "
794 " you must have called resumeFill() on the matrix before calling apply(). "
795 "This is NOT allowed. Note that this class may use the matrix's data in "
796 "place without copying it. Thus, you cannot change the matrix and expect "
797 "the solver to stay the same. If you have changed the matrix, first call "
798 "fillComplete() on it, then call compute() on this object, before you call"
799 " apply(). You do NOT need to call setMatrix, as long as the matrix "
800 "itself (that is, its address in memory) is the same.");
807 if (!isKokkosKernelsStream_) {
808 auto G = A_crs_->getGraph ();
810 (G.is_null (), std::logic_error, prefix <<
"A_ and A_crs_ are nonnull, "
811 "but A_crs_'s RowGraph G is null. "
812 "Please report this bug to the Ifpack2 developers.");
813 auto importer = G->getImporter ();
814 auto exporter = G->getExporter ();
816 if (! importer.is_null ()) {
817 if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
818 X_colMap_ =
rcp (
new MV (importer->getTargetMap (), X.getNumVectors ()));
821 X_colMap_->putScalar (STS::zero ());
826 X_colMap_->doImport (X, *importer, Tpetra::ZERO);
828 X_cur = importer.is_null () ? rcpFromRef (X) :
829 Teuchos::rcp_const_cast<
const MV> (X_colMap_);
831 if (! exporter.is_null ()) {
832 if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
833 Y_rowMap_ =
rcp (
new MV (exporter->getSourceMap (), Y.getNumVectors ()));
836 Y_rowMap_->putScalar (STS::zero ());
838 Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
840 Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
845 X_cur = rcpFromRef (X);
846 Y_cur = rcpFromRef (Y);
849 localApply (*X_cur, *Y_cur, mode, alpha, beta);
851 if (!isKokkosKernelsStream_) {
852 auto G = A_crs_->getGraph ();
853 auto exporter = G->getExporter ();
854 if (! exporter.is_null ()) {
855 Y.putScalar (STS::zero ());
856 Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
862 template<
class MatrixType>
872 const char tfecfFuncName[] =
"localTriangularSolve: ";
874 if (!isKokkosKernelsStream_)
877 (! A_crs_->isFillComplete (), std::runtime_error,
878 "The matrix is not fill complete.");
880 ( A_crs_->getLocalNumRows() > 0 && this->uplo_ ==
"N", std::runtime_error,
881 "The matrix is neither upper triangular or lower triangular. "
882 "You may only call this method if the matrix is triangular. "
883 "Remember that this is a local (per MPI process) property, and that "
884 "Tpetra only knows how to do a local (per process) triangular solve.");
888 for (
int i = 0; i < num_streams_; i++) {
890 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
891 "The matrix is not fill complete.");
893 ( A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ ==
"N", std::runtime_error,
894 "The matrix is neither upper triangular or lower triangular. "
895 "You may only call this method if the matrix is triangular. "
896 "Remember that this is a local (per MPI process) property, and that "
897 "Tpetra only knows how to do a local (per process) triangular solve.");
901 (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
902 "X and Y must be constant stride.");
905 (STS::isComplex && mode == TRANS, std::logic_error,
"This method does "
906 "not currently support non-conjugated transposed solve (mode == "
907 "Teuchos::TRANS) for complex scalar types.");
909 const std::string uplo = this->uplo_;
912 const size_t numVecs = std::min (X.getNumVectors (), Y.getNumVectors ());
916 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (this->A_);
917 auto A_lclk = A_crs->getLocalMatrixDevice ();
918 auto ptr = A_lclk.graph.row_map;
919 auto ind = A_lclk.graph.entries;
920 auto val = A_lclk.values;
922 for (
size_t j = 0; j < numVecs; ++j) {
923 auto X_j = X.getVectorNonConst (j);
924 auto Y_j = Y.getVector (j);
925 auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
926 auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
927 auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
928 auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
929 KokkosSparse::Experimental::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
931 typename k_handle::HandleExecSpace().fence();
934 else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans ==
"N")
936 std::vector<lno_row_view_t> ptr_v(num_streams_);
937 std::vector<lno_nonzero_view_t> ind_v(num_streams_);
938 std::vector<scalar_nonzero_view_t> val_v(num_streams_);
939 std::vector<k_handle *> KernelHandle_rawptr_v_(num_streams_);
940 for (
size_t j = 0; j < numVecs; ++j) {
941 auto X_j = X.getVectorNonConst (j);
942 auto Y_j = Y.getVector (j);
943 auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
944 auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
945 auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
946 auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
947 std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
948 std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
949 local_ordinal_type stream_begin = 0;
950 local_ordinal_type stream_end;
951 for (
int i = 0; i < num_streams_; i++) {
952 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_v_[i]);
953 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
954 ptr_v[i] = Alocal_i.graph.row_map;
955 ind_v[i] = Alocal_i.graph.entries;
956 val_v[i] = Alocal_i.values;
957 stream_end = stream_begin + Alocal_i.numRows();
958 x_v[i] = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
959 y_v[i] = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);;
960 KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
961 stream_begin = stream_end;
964 KokkosSparse::Experimental::sptrsv_solve_streams( exec_space_instances_, KernelHandle_rawptr_v_,
965 ptr_v, ind_v, val_v, y_v, x_v );
971 const std::string diag = this->diag_;
976 auto A_lcl = this->A_crs_->getLocalMatrixHost ();
978 if (X.isConstantStride () && Y.isConstantStride ()) {
979 auto X_lcl = X.getLocalViewHost (Tpetra::Access::ReadWrite);
980 auto Y_lcl = Y.getLocalViewHost (Tpetra::Access::ReadOnly);
981 KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
982 A_lcl, Y_lcl, X_lcl);
985 for (
size_t j = 0; j < numVecs; ++j) {
986 auto X_j = X.getVectorNonConst (j);
987 auto Y_j = Y.getVector (j);
988 auto X_lcl = X_j->getLocalViewHost (Tpetra::Access::ReadWrite);
989 auto Y_lcl = Y_j->getLocalViewHost (Tpetra::Access::ReadOnly);
990 KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
991 diag.c_str (), A_lcl, Y_lcl, X_lcl);
997 template<
class MatrixType>
999 LocalSparseTriangularSolver<MatrixType>::
1000 localApply (
const MV& X,
1003 const scalar_type& alpha,
1004 const scalar_type& beta)
const
1007 htsImpl_->isComputed ()) {
1008 htsImpl_->localApply (X, Y, mode, alpha, beta);
1013 typedef scalar_type ST;
1016 if (beta == STS::zero ()) {
1017 if (alpha == STS::zero ()) {
1018 Y.putScalar (STS::zero ());
1021 this->localTriangularSolve (X, Y, mode);
1022 if (alpha != STS::one ()) {
1028 if (alpha == STS::zero ()) {
1033 this->localTriangularSolve (X, Y_tmp, mode);
1034 Y.update (alpha, Y_tmp, beta);
1040 template <
class MatrixType>
1044 return numInitialize_;
1047 template <
class MatrixType>
1054 template <
class MatrixType>
1061 template <
class MatrixType>
1065 return initializeTime_;
1068 template<
class MatrixType>
1072 return computeTime_;
1075 template<
class MatrixType>
1082 template <
class MatrixType>
1087 std::ostringstream os;
1092 os <<
"\"Ifpack2::LocalSparseTriangularSolver\": {";
1093 if (this->getObjectLabel () !=
"") {
1094 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
1096 os <<
"Initialized: " << (isInitialized () ?
"true" :
"false") <<
", "
1097 <<
"Computed: " << (isComputed () ?
"true" :
"false") <<
", ";
1099 if(isKokkosKernelsSptrsv_) os <<
"KK-SPTRSV, ";
1100 if(isKokkosKernelsStream_) os <<
"KK-SolveStream, ";
1103 os <<
"Matrix: null";
1106 os <<
"Matrix dimensions: ["
1107 << A_->getGlobalNumRows () <<
", "
1108 << A_->getGlobalNumCols () <<
"]"
1109 <<
", Number of nonzeros: " << A_->getGlobalNumEntries();
1113 os <<
", HTS computed: " << (htsImpl_->isComputed () ?
"true" :
"false");
1118 template <
class MatrixType>
1135 Tpetra::getDefaultComm () :
1140 if (! comm.is_null () && comm->getRank () == 0) {
1145 out <<
"\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1155 template <
class MatrixType>
1161 (A_.
is_null (), std::runtime_error,
1162 "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1163 "The matrix is null. Please call setMatrix() with a nonnull input "
1164 "before calling this method.");
1165 return A_->getDomainMap ();
1168 template <
class MatrixType>
1174 (A_.
is_null (), std::runtime_error,
1175 "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1176 "The matrix is null. Please call setMatrix() with a nonnull input "
1177 "before calling this method.");
1178 return A_->getRangeMap ();
1181 template<
class MatrixType>
1185 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1194 (! A.
is_null () && A->getComm ()->getSize () == 1 &&
1195 A->getLocalNumRows () != A->getLocalNumCols (),
1196 std::runtime_error, prefix <<
"If A's communicator only contains one "
1197 "process, then A must be square. Instead, you provided a matrix A with "
1198 << A->getLocalNumRows () <<
" rows and " << A->getLocalNumCols ()
1204 isInitialized_ =
false;
1205 isComputed_ =
false;
1208 A_crs_ = Teuchos::null;
1215 (A_crs.
is_null (), std::invalid_argument, prefix <<
1216 "The input matrix A is not a Tpetra::CrsMatrix.");
1226 template<
class MatrixType>
1230 const std::vector<HandleExecSpace>& exec_space_instances)
1232 isKokkosKernelsStream_ = isKokkosKernelsStream;
1233 num_streams_ = num_streams;
1234 exec_space_instances_ = exec_space_instances;
1235 A_crs_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
1238 template<
class MatrixType>
1242 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1244 for(
int i = 0; i < num_streams_; i++) {
1249 if (A_crs_v[i].getRawPtr () != A_crs_v_[i].getRawPtr () || isInternallyChanged_) {
1252 (! A_crs_v[i].
is_null () && A_crs_v[i]->getComm ()->getSize () == 1 &&
1253 A_crs_v[i]->getLocalNumRows () != A_crs_v[i]->getLocalNumCols (),
1254 std::runtime_error, prefix <<
"If A's communicator only contains one "
1255 "process, then A must be square. Instead, you provided a matrix A with "
1256 << A_crs_v[i]->getLocalNumRows () <<
" rows and " << A_crs_v[i]->getLocalNumCols ()
1262 isInitialized_ =
false;
1263 isComputed_ =
false;
1266 A_crs_v_[i] = Teuchos::null;
1272 (A_crs.
is_null (), std::invalid_argument, prefix <<
1273 "The input matrix A is not a Tpetra::CrsMatrix.");
1274 A_crs_v_[i] = A_crs;
1282 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
1283 template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
1285 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:706
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1050
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1085
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
"Numeric" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:602
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:189
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1078
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1071
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1064
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner's matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1240
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1158
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void initialize()
"Symbolic" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:355
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1043
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver's stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1229
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner's matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1183
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string ¶mName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1057
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1171
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:75
void setParameters(const Teuchos::ParameterList ¶ms)
Set this object's parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:308
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:58
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1120
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:289
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:250
static std::string name()
std::string typeName(const T &t)