43 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
44 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_Core.hpp"
48 #include "Teuchos_StandardParameterEntryValidators.hpp"
49 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
50 #include "KokkosSparse_trsv.hpp"
52 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
53 # include "shylu_hts.hpp"
59 struct TrisolverType {
68 type_strs[0] =
"Internal";
70 type_strs[2] =
"KSPTRSV";
72 type_enums[0] = Internal;
74 type_enums[2] = KSPTRSV;
79 template<
class MatrixType>
80 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
82 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type>
crs_matrix_type;
85 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
86 Timpl_ = Teuchos::null;
87 levelset_block_size_ = 1;
93 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
94 const char* block_size_s =
"trisolver: block size";
97 "The parameter \"" << block_size_s <<
"\" must be of type int.");
98 levelset_block_size_ = pl.
get<
int>(block_size_s);
100 if (levelset_block_size_ < 1)
101 levelset_block_size_ = 1;
110 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
112 transpose_ = conjugate_ =
false;
119 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
122 auto rowptr = T_in.getLocalRowPtrsHost();
123 auto colidx = T_in.getLocalIndicesHost();
124 auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
128 HTST::make_CrsMatrix(rowptr.size() - 1,
129 rowptr.data(), colidx.data(),
132 transpose_, conjugate_),
133 HtsCrsMatrixDeleter());
137 HTST::reprocess_numeric(Timpl_.get(), T_hts.
get());
140 if (T_in.getCrsGraph().is_null()) {
142 *out <<
"HTS compute failed because T_in.getCrsGraph().is_null().\n";
145 if ( ! T_in.getCrsGraph()->isSorted()) {
147 *out <<
"HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
150 if ( ! T_in.isStorageOptimized()) {
152 *out <<
"HTS compute failed because ! T_in.isStorageOptimized().\n";
156 typename HTST::PreprocessArgs args;
157 args.T = T_hts.
get();
160 args.nthreads = omp_get_max_threads();
164 args.save_for_reprocess =
true;
165 typename HTST::Options opts;
166 opts.levelset_block_size = levelset_block_size_;
167 args.options = &opts;
170 Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
171 }
catch (
const std::exception& e) {
173 *out <<
"HTS preprocess threw: " << e.what() <<
"\n";
182 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
190 void localApply (
const MV& X, MV& Y,
197 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
198 const auto& X_view = X.getLocalViewHost (Tpetra::Access::ReadOnly);
199 const auto& Y_view = Y.getLocalViewHost (Tpetra::Access::ReadWrite);
202 HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
204 HTST::solve_omp(Timpl_.get(),
206 reinterpret_cast<const scalar_type*
>(X_view.data()),
215 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216 typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
217 typedef typename HTST::Impl TImpl;
218 typedef typename HTST::CrsMatrix HtsCrsMatrix;
220 struct TImplDeleter {
221 void free (TImpl* impl) {
222 HTST::delete_Impl(impl);
226 struct HtsCrsMatrixDeleter {
227 void free (HtsCrsMatrix* T) {
228 HTST::delete_CrsMatrix(T);
233 bool transpose_, conjugate_;
234 int levelset_block_size_;
238 template<
class MatrixType>
249 (A_crs.
is_null (), std::invalid_argument,
250 "Ifpack2::LocalSparseTriangularSolver constructor: "
251 "The input matrix A is not a Tpetra::CrsMatrix.");
256 template<
class MatrixType>
265 *out_ <<
">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
273 (A_crs.
is_null (), std::invalid_argument,
274 "Ifpack2::LocalSparseTriangularSolver constructor: "
275 "The input matrix A is not a Tpetra::CrsMatrix.");
280 template<
class MatrixType>
287 template<
class MatrixType>
294 *out_ <<
">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
299 template<
class MatrixType>
302 isInitialized_ =
false;
304 reverseStorage_ =
false;
305 isInternallyChanged_ =
false;
309 initializeTime_ = 0.0;
312 isKokkosKernelsSptrsv_ =
false;
313 isKokkosKernelsStream_ =
false;
319 template<
class MatrixType>
323 if (!isKokkosKernelsStream_) {
325 kh_->destroy_sptrsv_handle();
329 for (
int i = 0; i < num_streams_; i++) {
331 kh_v_[i]->destroy_sptrsv_handle();
337 template<
class MatrixType>
346 Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
348 static const char typeName[] =
"trisolver: type";
350 if ( ! pl.
isType<std::string>(typeName))
break;
353 Array<std::string> trisolverTypeStrs;
354 Array<Details::TrisolverType::Enum> trisolverTypeEnums;
355 Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
357 s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName,
false);
362 if (trisolverType == Details::TrisolverType::HTS) {
364 htsImpl_->setParameters (pl);
367 if (trisolverType == Details::TrisolverType::KSPTRSV) {
368 this->isKokkosKernelsSptrsv_ =
true;
371 this->isKokkosKernelsSptrsv_ =
false;
375 reverseStorage_ = pl.
get<
bool>(
"trisolver: reverse U");
378 (reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
379 std::logic_error,
"Ifpack2::LocalSparseTriangularSolver::setParameters: "
380 "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
381 "options. See GitHub issue #2647.");
384 template<
class MatrixType>
389 using Tpetra::Details::determineLocalTriangularStructure;
391 using local_matrix_type =
typename crs_matrix_type::local_matrix_device_type;
394 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::initialize: ";
395 if (! out_.is_null ()) {
396 *out_ <<
">>> DEBUG " << prefix << std::endl;
399 if (!isKokkosKernelsStream_) {
401 (A_.
is_null (), std::runtime_error, prefix <<
"You must call "
402 "setMatrix() with a nonnull input matrix before you may call "
403 "initialize() or compute().");
404 if (A_crs_.is_null ()) {
407 (A_crs.get () ==
nullptr, std::invalid_argument,
408 prefix <<
"The input matrix A is not a Tpetra::CrsMatrix.");
411 auto G = A_crs_->getGraph ();
413 (G.is_null (), std::logic_error, prefix <<
"A_ and A_crs_ are nonnull, "
414 "but A_crs_'s RowGraph G is null. "
415 "Please report this bug to the Ifpack2 developers.");
420 (! G->isFillComplete (), std::runtime_error,
"If you call this method, "
421 "the matrix's graph must be fill complete. It is not.");
424 constexpr
bool ignoreMapsForTriStructure =
true;
425 auto lclTriStructure = [&] {
426 auto lclMatrix = A_crs_->getLocalMatrixDevice ();
427 auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
428 auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
430 determineLocalTriangularStructure (lclMatrix.graph,
433 ignoreMapsForTriStructure);
434 const LO lclNumRows = lclRowMap.getLocalNumElements ();
435 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ?
"U" :
"N";
436 this->uplo_ = lclTriStruct.couldBeLowerTriangular ?
"L" :
437 (lclTriStruct.couldBeUpperTriangular ?
"U" :
"N");
441 if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
442 htsImpl_.is_null ()) {
444 auto Alocal = A_crs_->getLocalMatrixDevice();
445 auto ptr = Alocal.graph.row_map;
446 auto ind = Alocal.graph.entries;
447 auto val = Alocal.values;
449 auto numRows = Alocal.numRows();
450 auto numCols = Alocal.numCols();
451 auto numNnz = Alocal.nnz();
453 typename decltype(ptr)::non_const_type newptr (
"ptr", ptr.extent (0));
454 typename decltype(ind)::non_const_type newind (
"ind", ind.extent (0));
455 decltype(val) newval (
"val", val.extent (0));
458 typename crs_matrix_type::execution_space().fence();
461 auto A_r = Alocal.row(numRows-1 - row);
463 auto numEnt = A_r.length;
465 newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
466 newval(rowStart + k) = A_r.value (numEnt-1 - k);
469 newptr(row+1) = rowStart;
471 typename crs_matrix_type::execution_space().fence();
477 auto rowMap = A_->getRowMap();
478 auto numElems = rowMap->getLocalNumElements();
479 auto rowElems = rowMap->getLocalElementList();
482 for (
size_t i = 0; i < numElems; i++)
483 newRowElems[i] = rowElems[numElems-1 - i];
485 newRowMap =
Teuchos::rcp(
new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
489 auto colMap = A_->getColMap();
490 auto numElems = colMap->getLocalNumElements();
491 auto colElems = colMap->getLocalElementList();
494 for (
size_t i = 0; i < numElems; i++)
495 newColElems[i] = colElems[numElems-1 - i];
497 newColMap =
Teuchos::rcp(
new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
501 local_matrix_type newLocalMatrix(
"Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
505 isInternallyChanged_ =
true;
510 auto newLclTriStructure =
511 determineLocalTriangularStructure (newLocalMatrix.graph,
512 newRowMap->getLocalMap (),
513 newColMap->getLocalMap (),
514 ignoreMapsForTriStructure);
515 const LO newLclNumRows = newRowMap->getLocalNumElements ();
516 this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ?
"U" :
"N";
517 this->uplo_ = newLclTriStructure.couldBeLowerTriangular ?
"L" :
518 (newLclTriStructure.couldBeUpperTriangular ?
"U" :
"N");
522 for (
int i = 0; i < num_streams_; i++) {
524 (A_crs_v_[i].
is_null (), std::runtime_error, prefix <<
"You must call "
525 "setMatrix() with a nonnull input matrix before you may call "
526 "initialize() or compute().");
527 auto G = A_crs_v_[i]->getGraph ();
529 (G.is_null (), std::logic_error, prefix <<
"A_crs_ are nonnull, "
530 "but A_crs_'s RowGraph G is null. "
531 "Please report this bug to the Ifpack2 developers.");
536 (! G->isFillComplete (), std::runtime_error,
"If you call this method, "
537 "the matrix's graph must be fill complete. It is not.");
540 constexpr
bool ignoreMapsForTriStructure =
true;
541 std::string prev_uplo_ = this->uplo_;
542 std::string prev_diag_ = this->diag_;
543 auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice ();
544 auto lclRowMap = A_crs_v_[i]->getRowMap ()->getLocalMap ();
545 auto lclColMap = A_crs_v_[i]->getColMap ()->getLocalMap ();
547 determineLocalTriangularStructure (lclMatrix.graph,
550 ignoreMapsForTriStructure);
551 const LO lclNumRows = lclRowMap.getLocalNumElements ();
552 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ?
"U" :
"N";
553 this->uplo_ = lclTriStruct.couldBeLowerTriangular ?
"L" :
554 (lclTriStruct.couldBeUpperTriangular ?
"U" :
"N");
557 ((this->diag_ != prev_diag_) || (this->uplo_ != prev_uplo_),
558 std::logic_error, prefix <<
"A_crs_'s structures in streams "
559 "are different. Please report this bug to the Ifpack2 developers.");
566 htsImpl_->initialize (*A_crs_);
567 isInternallyChanged_ =
true;
570 const bool ksptrsv_valid_uplo = (this->uplo_ !=
"N");
571 kh_v_nonnull_ =
false;
572 if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ !=
"U")
574 if (!isKokkosKernelsStream_) {
578 kh_v_ = std::vector< Teuchos::RCP<k_handle> >(num_streams_);
579 for (
int i = 0; i < num_streams_; i++) {
582 kh_v_nonnull_ =
true;
586 isInitialized_ =
true;
590 template<
class MatrixType>
595 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::compute: ";
596 if (! out_.is_null ()) {
597 *out_ <<
">>> DEBUG " << prefix << std::endl;
600 if (!isKokkosKernelsStream_) {
602 (A_.
is_null (), std::runtime_error, prefix <<
"You must call "
603 "setMatrix() with a nonnull input matrix before you may call "
604 "initialize() or compute().");
606 (A_crs_.is_null (), std::logic_error, prefix <<
"A_ is nonnull, but "
607 "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
610 (! A_crs_->isFillComplete (), std::runtime_error,
"If you call this "
611 "method, the matrix must be fill complete. It is not.");
614 for(
int i = 0; i < num_streams_; i++) {
616 (A_crs_v_[i].
is_null (), std::runtime_error, prefix <<
"You must call "
617 "setMatrices() with a nonnull input matrix before you may call "
618 "initialize() or compute().");
621 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
"If you call this "
622 "method, the matrix must be fill complete. It is not.");
626 if (! isInitialized_) {
630 (! isInitialized_, std::logic_error, prefix <<
"initialize() should have "
631 "been called by this point, but isInitialized_ is false. "
632 "Please report this bug to the Ifpack2 developers.");
636 htsImpl_->compute (*A_crs_, out_);
639 const bool is_lower_tri = (this->uplo_ ==
"L") ?
true :
false;
642 auto Alocal = A_crs->getLocalMatrixDevice();
643 auto ptr = Alocal.graph.row_map;
644 auto ind = Alocal.graph.entries;
645 auto val = Alocal.values;
647 auto numRows = Alocal.numRows();
650 kh_->destroy_sptrsv_handle();
651 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
654 if (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
655 std::is_same<int, local_ordinal_type>::value &&
656 (std::is_same<scalar_type, float>::value ||
657 std::is_same<scalar_type, double>::value ||
658 std::is_same<
scalar_type, Kokkos::complex<float>>::value ||
659 std::is_same<
scalar_type, Kokkos::complex<double>>::value))
661 kh_->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE, numRows, is_lower_tri);
666 kh_->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1, numRows, is_lower_tri);
668 KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
670 else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_) {
671 const bool is_lower_tri = (this->uplo_ ==
"L") ?
true :
false;
673 for (
int i = 0; i < num_streams_; i++) {
674 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_v_[i]);
675 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
676 auto ptr_i = Alocal_i.graph.row_map;
677 auto ind_i = Alocal_i.graph.entries;
678 auto val_i = Alocal_i.values;
680 auto numRows_i = Alocal_i.numRows();
683 kh_v_[i]->destroy_sptrsv_handle();
684 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
687 if (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
688 std::is_same<int, local_ordinal_type>::value &&
689 (std::is_same<scalar_type, float>::value ||
690 std::is_same<scalar_type, double>::value ||
691 std::is_same<
scalar_type, Kokkos::complex<float>>::value ||
692 std::is_same<
scalar_type, Kokkos::complex<double>>::value))
694 kh_v_[i]->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE, numRows_i, is_lower_tri);
699 kh_v_[i]->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1, numRows_i, is_lower_tri);
701 KokkosSparse::Experimental::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
710 template<
class MatrixType>
722 using Teuchos::rcpFromRef;
725 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::apply: ";
727 if (! out_.is_null ()) {
728 *out_ <<
">>> DEBUG " << prefix;
729 if (!isKokkosKernelsStream_) {
730 if (A_crs_.is_null ()) {
731 *out_ <<
"A_crs_ is null!" << std::endl;
736 const std::string uplo = this->uplo_;
739 const std::string diag = this->diag_;
740 *out_ <<
"uplo=\"" << uplo
741 <<
"\", trans=\"" << trans
742 <<
"\", diag=\"" << diag <<
"\"" << std::endl;
746 for (
int i = 0; i < num_streams_; i++) {
748 *out_ <<
"A_crs_v_[" << i <<
"]" <<
" is null!" << std::endl;
751 const std::string uplo = this->uplo_;
754 const std::string diag = this->diag_;
755 *out_ <<
"A_crs_v_[" << i <<
"]: "
757 <<
"\", trans=\"" << trans
758 <<
"\", diag=\"" << diag <<
"\"" << std::endl;
765 (! isComputed (), std::runtime_error, prefix <<
"If compute() has not yet "
766 "been called, or if you have changed the matrix via setMatrix(), you must "
767 "call compute() before you may call this method.");
770 if (!isKokkosKernelsStream_) {
772 (A_.
is_null (), std::logic_error, prefix <<
"A_ is null. "
773 "Please report this bug to the Ifpack2 developers.");
775 (A_crs_.is_null (), std::logic_error, prefix <<
"A_crs_ is null. "
776 "Please report this bug to the Ifpack2 developers.");
780 (! A_crs_->isFillComplete (), std::runtime_error,
"If you call this "
781 "method, the matrix must be fill complete. It is not. This means that "
782 " you must have called resumeFill() on the matrix before calling apply(). "
783 "This is NOT allowed. Note that this class may use the matrix's data in "
784 "place without copying it. Thus, you cannot change the matrix and expect "
785 "the solver to stay the same. If you have changed the matrix, first call "
786 "fillComplete() on it, then call compute() on this object, before you call"
787 " apply(). You do NOT need to call setMatrix, as long as the matrix "
788 "itself (that is, its address in memory) is the same.");
791 for (
int i = 0; i < num_streams_; i++) {
793 (A_crs_v_[i].
is_null (), std::logic_error, prefix <<
"A_crs_ is null. "
794 "Please report this bug to the Ifpack2 developers.");
798 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
"If you call this "
799 "method, the matrix must be fill complete. It is not. This means that "
800 " you must have called resumeFill() on the matrix before calling apply(). "
801 "This is NOT allowed. Note that this class may use the matrix's data in "
802 "place without copying it. Thus, you cannot change the matrix and expect "
803 "the solver to stay the same. If you have changed the matrix, first call "
804 "fillComplete() on it, then call compute() on this object, before you call"
805 " apply(). You do NOT need to call setMatrix, as long as the matrix "
806 "itself (that is, its address in memory) is the same.");
813 if (!isKokkosKernelsStream_) {
814 auto G = A_crs_->getGraph ();
816 (G.is_null (), std::logic_error, prefix <<
"A_ and A_crs_ are nonnull, "
817 "but A_crs_'s RowGraph G is null. "
818 "Please report this bug to the Ifpack2 developers.");
819 auto importer = G->getImporter ();
820 auto exporter = G->getExporter ();
822 if (! importer.is_null ()) {
823 if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
824 X_colMap_ =
rcp (
new MV (importer->getTargetMap (), X.getNumVectors ()));
827 X_colMap_->putScalar (STS::zero ());
832 X_colMap_->doImport (X, *importer, Tpetra::ZERO);
834 X_cur = importer.is_null () ? rcpFromRef (X) :
835 Teuchos::rcp_const_cast<
const MV> (X_colMap_);
837 if (! exporter.is_null ()) {
838 if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
839 Y_rowMap_ =
rcp (
new MV (exporter->getSourceMap (), Y.getNumVectors ()));
842 Y_rowMap_->putScalar (STS::zero ());
844 Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
846 Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
851 X_cur = rcpFromRef (X);
852 Y_cur = rcpFromRef (Y);
855 localApply (*X_cur, *Y_cur, mode, alpha, beta);
857 if (!isKokkosKernelsStream_) {
858 auto G = A_crs_->getGraph ();
859 auto exporter = G->getExporter ();
860 if (! exporter.is_null ()) {
861 Y.putScalar (STS::zero ());
862 Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
868 template<
class MatrixType>
878 const char tfecfFuncName[] =
"localTriangularSolve: ";
880 if (!isKokkosKernelsStream_)
883 (! A_crs_->isFillComplete (), std::runtime_error,
884 "The matrix is not fill complete.");
886 ( A_crs_->getLocalNumRows() > 0 && this->uplo_ ==
"N", std::runtime_error,
887 "The matrix is neither upper triangular or lower triangular. "
888 "You may only call this method if the matrix is triangular. "
889 "Remember that this is a local (per MPI process) property, and that "
890 "Tpetra only knows how to do a local (per process) triangular solve.");
894 for (
int i = 0; i < num_streams_; i++) {
896 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
897 "The matrix is not fill complete.");
899 ( A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ ==
"N", std::runtime_error,
900 "The matrix is neither upper triangular or lower triangular. "
901 "You may only call this method if the matrix is triangular. "
902 "Remember that this is a local (per MPI process) property, and that "
903 "Tpetra only knows how to do a local (per process) triangular solve.");
907 (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
908 "X and Y must be constant stride.");
911 (STS::isComplex && mode == TRANS, std::logic_error,
"This method does "
912 "not currently support non-conjugated transposed solve (mode == "
913 "Teuchos::TRANS) for complex scalar types.");
915 const std::string uplo = this->uplo_;
918 const size_t numVecs = std::min (X.getNumVectors (), Y.getNumVectors ());
922 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (this->A_);
923 auto A_lclk = A_crs->getLocalMatrixDevice ();
924 auto ptr = A_lclk.graph.row_map;
925 auto ind = A_lclk.graph.entries;
926 auto val = A_lclk.values;
928 for (
size_t j = 0; j < numVecs; ++j) {
929 auto X_j = X.getVectorNonConst (j);
930 auto Y_j = Y.getVector (j);
931 auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
932 auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
933 auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
934 auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
935 KokkosSparse::Experimental::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
937 typename k_handle::HandleExecSpace().fence();
940 else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans ==
"N")
942 std::vector<lno_row_view_t> ptr_v(num_streams_);
943 std::vector<lno_nonzero_view_t> ind_v(num_streams_);
944 std::vector<scalar_nonzero_view_t> val_v(num_streams_);
945 std::vector<k_handle *> KernelHandle_rawptr_v_(num_streams_);
946 for (
size_t j = 0; j < numVecs; ++j) {
947 auto X_j = X.getVectorNonConst (j);
948 auto Y_j = Y.getVector (j);
949 auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
950 auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
951 auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
952 auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
953 std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
954 std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
955 local_ordinal_type stream_begin = 0;
956 local_ordinal_type stream_end;
957 for (
int i = 0; i < num_streams_; i++) {
958 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_v_[i]);
959 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
960 ptr_v[i] = Alocal_i.graph.row_map;
961 ind_v[i] = Alocal_i.graph.entries;
962 val_v[i] = Alocal_i.values;
963 stream_end = stream_begin + Alocal_i.numRows();
964 x_v[i] = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
965 y_v[i] = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);;
966 KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
967 stream_begin = stream_end;
969 KokkosSparse::Experimental::sptrsv_solve_streams( exec_space_instances_, KernelHandle_rawptr_v_,
970 ptr_v, ind_v, val_v, y_v, x_v );
971 for (
int i = 0; i < num_streams_; i++) {
972 exec_space_instances_[i].fence();
978 const std::string diag = this->diag_;
983 auto A_lcl = this->A_crs_->getLocalMatrixHost ();
985 if (X.isConstantStride () && Y.isConstantStride ()) {
986 auto X_lcl = X.getLocalViewHost (Tpetra::Access::ReadWrite);
987 auto Y_lcl = Y.getLocalViewHost (Tpetra::Access::ReadOnly);
988 KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
989 A_lcl, Y_lcl, X_lcl);
992 for (
size_t j = 0; j < numVecs; ++j) {
993 auto X_j = X.getVectorNonConst (j);
994 auto Y_j = Y.getVector (j);
995 auto X_lcl = X_j->getLocalViewHost (Tpetra::Access::ReadWrite);
996 auto Y_lcl = Y_j->getLocalViewHost (Tpetra::Access::ReadOnly);
997 KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
998 diag.c_str (), A_lcl, Y_lcl, X_lcl);
1004 template<
class MatrixType>
1006 LocalSparseTriangularSolver<MatrixType>::
1007 localApply (
const MV& X,
1010 const scalar_type& alpha,
1011 const scalar_type& beta)
const
1014 htsImpl_->isComputed ()) {
1015 htsImpl_->localApply (X, Y, mode, alpha, beta);
1020 typedef scalar_type ST;
1023 if (beta == STS::zero ()) {
1024 if (alpha == STS::zero ()) {
1025 Y.putScalar (STS::zero ());
1028 this->localTriangularSolve (X, Y, mode);
1029 if (alpha != STS::one ()) {
1035 if (alpha == STS::zero ()) {
1040 this->localTriangularSolve (X, Y_tmp, mode);
1041 Y.update (alpha, Y_tmp, beta);
1047 template <
class MatrixType>
1051 return numInitialize_;
1054 template <
class MatrixType>
1061 template <
class MatrixType>
1068 template <
class MatrixType>
1072 return initializeTime_;
1075 template<
class MatrixType>
1079 return computeTime_;
1082 template<
class MatrixType>
1089 template <
class MatrixType>
1094 std::ostringstream os;
1099 os <<
"\"Ifpack2::LocalSparseTriangularSolver\": {";
1100 if (this->getObjectLabel () !=
"") {
1101 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
1103 os <<
"Initialized: " << (isInitialized () ?
"true" :
"false") <<
", "
1104 <<
"Computed: " << (isComputed () ?
"true" :
"false") <<
", ";
1106 if(isKokkosKernelsSptrsv_) os <<
"KK-SPTRSV, ";
1107 if(isKokkosKernelsStream_) os <<
"KK-SolveStream, ";
1110 os <<
"Matrix: null";
1113 os <<
"Matrix dimensions: ["
1114 << A_->getGlobalNumRows () <<
", "
1115 << A_->getGlobalNumCols () <<
"]"
1116 <<
", Number of nonzeros: " << A_->getGlobalNumEntries();
1120 os <<
", HTS computed: " << (htsImpl_->isComputed () ?
"true" :
"false");
1125 template <
class MatrixType>
1142 Tpetra::getDefaultComm () :
1147 if (! comm.is_null () && comm->getRank () == 0) {
1152 out <<
"\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1162 template <
class MatrixType>
1168 (A_.
is_null (), std::runtime_error,
1169 "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1170 "The matrix is null. Please call setMatrix() with a nonnull input "
1171 "before calling this method.");
1172 return A_->getDomainMap ();
1175 template <
class MatrixType>
1181 (A_.
is_null (), std::runtime_error,
1182 "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1183 "The matrix is null. Please call setMatrix() with a nonnull input "
1184 "before calling this method.");
1185 return A_->getRangeMap ();
1188 template<
class MatrixType>
1192 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1201 (! A.
is_null () && A->getComm ()->getSize () == 1 &&
1202 A->getLocalNumRows () != A->getLocalNumCols (),
1203 std::runtime_error, prefix <<
"If A's communicator only contains one "
1204 "process, then A must be square. Instead, you provided a matrix A with "
1205 << A->getLocalNumRows () <<
" rows and " << A->getLocalNumCols ()
1211 isInitialized_ =
false;
1212 isComputed_ =
false;
1215 A_crs_ = Teuchos::null;
1222 (A_crs.
is_null (), std::invalid_argument, prefix <<
1223 "The input matrix A is not a Tpetra::CrsMatrix.");
1236 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1237 isComputed_ =
false;
1241 template<
class MatrixType>
1245 const std::vector<HandleExecSpace>& exec_space_instances)
1247 isKokkosKernelsStream_ = isKokkosKernelsStream;
1248 num_streams_ = num_streams;
1249 exec_space_instances_ = exec_space_instances;
1250 A_crs_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
1253 template<
class MatrixType>
1257 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1259 for(
int i = 0; i < num_streams_; i++) {
1264 if (A_crs_v[i].getRawPtr () != A_crs_v_[i].getRawPtr () || isInternallyChanged_) {
1267 (! A_crs_v[i].
is_null () && A_crs_v[i]->getComm ()->getSize () == 1 &&
1268 A_crs_v[i]->getLocalNumRows () != A_crs_v[i]->getLocalNumCols (),
1269 std::runtime_error, prefix <<
"If A's communicator only contains one "
1270 "process, then A must be square. Instead, you provided a matrix A with "
1271 << A_crs_v[i]->getLocalNumRows () <<
" rows and " << A_crs_v[i]->getLocalNumCols ()
1277 isInitialized_ =
false;
1278 isComputed_ =
false;
1281 A_crs_v_[i] = Teuchos::null;
1287 (A_crs.
is_null (), std::invalid_argument, prefix <<
1288 "The input matrix A is not a Tpetra::CrsMatrix.");
1289 A_crs_v_[i] = A_crs;
1298 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1299 isComputed_ =
false;
1305 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
1306 template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
1308 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:712
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1057
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1092
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
"Numeric" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:593
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:95
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:222
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1085
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1078
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1071
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:97
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner's matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1255
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1165
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:79
void initialize()
"Symbolic" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:387
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1050
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver's stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1244
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:102
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner's matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1190
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string ¶mName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1064
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:93
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1178
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:108
void setParameters(const Teuchos::ParameterList ¶ms)
Set this object's parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:340
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:91
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1127
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:321
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:282
static std::string name()
std::string typeName(const T &t)