10 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
16 #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
17 #include "Tpetra_CrsMatrix.hpp"
18 #include "Tpetra_Core.hpp"
19 #include "Teuchos_StandardParameterEntryValidators.hpp"
20 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
21 #include "KokkosSparse_trsv.hpp"
23 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
24 #include "shylu_hts.hpp"
31 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
33 inline void cusparse_error_throw(cusparseStatus_t cusparseStatus,
const char* name,
34 const char* file,
const int line) {
35 std::ostringstream out;
36 #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)
37 out << name <<
" error( " << cusparseGetErrorName(cusparseStatus) <<
"): " << cusparseGetErrorString(cusparseStatus);
39 out << name <<
" error( ";
40 switch (cusparseStatus) {
41 case CUSPARSE_STATUS_NOT_INITIALIZED:
42 out <<
"CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not "
45 case CUSPARSE_STATUS_ALLOC_FAILED:
46 out <<
"CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too "
49 case CUSPARSE_STATUS_INVALID_VALUE: out <<
"CUSPARSE_STATUS_INVALID_VALUE)";
break;
50 case CUSPARSE_STATUS_ARCH_MISMATCH: out <<
"CUSPARSE_STATUS_ARCH_MISMATCH)";
break;
51 case CUSPARSE_STATUS_MAPPING_ERROR: out <<
"CUSPARSE_STATUS_MAPPING_ERROR)";
break;
52 case CUSPARSE_STATUS_EXECUTION_FAILED: out <<
"CUSPARSE_STATUS_EXECUTION_FAILED)";
break;
53 case CUSPARSE_STATUS_INTERNAL_ERROR: out <<
"CUSPARSE_STATUS_INTERNAL_ERROR)";
break;
54 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out <<
"CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)";
break;
55 case CUSPARSE_STATUS_ZERO_PIVOT: out <<
"CUSPARSE_STATUS_ZERO_PIVOT)";
break;
56 default: out <<
"unrecognized error code): this is bad!";
break;
58 #endif // CUSPARSE_VERSION
60 out <<
" " << file <<
":" << line;
62 throw std::runtime_error(out.str());
65 inline void cusparse_safe_call(cusparseStatus_t cusparseStatus,
const char* name,
const char* file =
nullptr,
67 if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) {
68 cusparse_error_throw(cusparseStatus, name, file, line);
72 #define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \
73 Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__)
75 #endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
77 struct TrisolverType {
86 type_strs[0] =
"Internal";
88 type_strs[2] =
"KSPTRSV";
90 type_enums[0] = Internal;
92 type_enums[2] = KSPTRSV;
97 template <
class MatrixType>
98 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
100 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type>
crs_matrix_type;
103 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
104 Timpl_ = Teuchos::null;
105 levelset_block_size_ = 1;
111 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
112 const char* block_size_s =
"trisolver: block size";
115 "The parameter \"" << block_size_s <<
"\" must be of type int.");
116 levelset_block_size_ = pl.
get<
int>(block_size_s);
118 if (levelset_block_size_ < 1)
119 levelset_block_size_ = 1;
128 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
130 transpose_ = conjugate_ =
false;
137 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
140 auto rowptr = T_in.getLocalRowPtrsHost();
141 auto colidx = T_in.getLocalIndicesHost();
142 auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
146 HTST::make_CrsMatrix(rowptr.size() - 1,
147 rowptr.data(), colidx.data(),
150 transpose_, conjugate_),
151 HtsCrsMatrixDeleter());
155 HTST::reprocess_numeric(Timpl_.get(), T_hts.
get());
158 if (T_in.getCrsGraph().is_null()) {
160 *out <<
"HTS compute failed because T_in.getCrsGraph().is_null().\n";
163 if (!T_in.getCrsGraph()->isSorted()) {
165 *out <<
"HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
168 if (!T_in.isStorageOptimized()) {
170 *out <<
"HTS compute failed because ! T_in.isStorageOptimized().\n";
174 typename HTST::PreprocessArgs args;
175 args.T = T_hts.
get();
178 args.nthreads = omp_get_max_threads();
182 args.save_for_reprocess =
true;
183 typename HTST::Options opts;
184 opts.levelset_block_size = levelset_block_size_;
185 args.options = &opts;
188 Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
189 }
catch (
const std::exception& e) {
191 *out <<
"HTS preprocess threw: " << e.what() <<
"\n";
200 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
208 void localApply(
const MV& X, MV& Y,
215 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216 const auto& X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
217 const auto& Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
220 HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
222 HTST::solve_omp(Timpl_.get(),
224 reinterpret_cast<const scalar_type*
>(X_view.data()),
233 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
234 typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
235 typedef typename HTST::Impl TImpl;
236 typedef typename HTST::CrsMatrix HtsCrsMatrix;
238 struct TImplDeleter {
239 void free(TImpl* impl) {
240 HTST::delete_Impl(impl);
244 struct HtsCrsMatrixDeleter {
245 void free(HtsCrsMatrix* T) {
246 HTST::delete_CrsMatrix(T);
251 bool transpose_, conjugate_;
252 int levelset_block_size_;
256 template <
class MatrixType>
266 "Ifpack2::LocalSparseTriangularSolver constructor: "
267 "The input matrix A is not a Tpetra::CrsMatrix.");
272 template <
class MatrixType>
280 *out_ <<
">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
288 "Ifpack2::LocalSparseTriangularSolver constructor: "
289 "The input matrix A is not a Tpetra::CrsMatrix.");
294 template <
class MatrixType>
300 template <
class MatrixType>
306 *out_ <<
">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
311 template <
class MatrixType>
313 isInitialized_ =
false;
315 reverseStorage_ =
false;
316 isInternallyChanged_ =
false;
320 initializeTime_ = 0.0;
323 isKokkosKernelsSptrsv_ =
false;
324 isKokkosKernelsStream_ =
false;
330 template <
class MatrixType>
333 if (!isKokkosKernelsStream_) {
335 kh_->destroy_sptrsv_handle();
338 for (
size_t i = 0; i < kh_v_.size(); i++) {
340 kh_v_[i]->destroy_sptrsv_handle();
346 template <
class MatrixType>
353 Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
355 static const char typeName[] =
"trisolver: type";
357 if (!pl.
isType<std::string>(typeName))
break;
360 Array<std::string> trisolverTypeStrs;
361 Array<Details::TrisolverType::Enum> trisolverTypeEnums;
362 Details::TrisolverType::loadPLTypeOption(trisolverTypeStrs, trisolverTypeEnums);
364 s2i(trisolverTypeStrs(), trisolverTypeEnums(), typeName,
false);
369 if (trisolverType == Details::TrisolverType::HTS) {
371 htsImpl_->setParameters(pl);
374 if (trisolverType == Details::TrisolverType::KSPTRSV) {
375 this->isKokkosKernelsSptrsv_ =
true;
377 this->isKokkosKernelsSptrsv_ =
false;
381 reverseStorage_ = pl.
get<
bool>(
"trisolver: reverse U");
383 TEUCHOS_TEST_FOR_EXCEPTION(reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
385 "Ifpack2::LocalSparseTriangularSolver::setParameters: "
386 "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
387 "options. See GitHub issue #2647.");
390 template <
class MatrixType>
393 using Tpetra::Details::determineLocalTriangularStructure;
395 using local_matrix_type =
typename crs_matrix_type::local_matrix_device_type;
398 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::initialize: ";
399 if (!out_.is_null()) {
400 *out_ <<
">>> DEBUG " << prefix << std::endl;
403 if (!isKokkosKernelsStream_) {
405 "setMatrix() with a nonnull input matrix before you may call "
406 "initialize() or compute().");
407 if (A_crs_.is_null()) {
410 prefix <<
"The input matrix A is not a Tpetra::CrsMatrix.");
413 auto G = A_crs_->getGraph();
415 "but A_crs_'s RowGraph G is null. "
416 "Please report this bug to the Ifpack2 developers.");
421 "If you call this method, "
422 "the matrix's graph must be fill complete. It is not.");
425 constexpr
bool ignoreMapsForTriStructure =
true;
426 auto lclTriStructure = [&] {
427 auto lclMatrix = A_crs_->getLocalMatrixDevice();
428 auto lclRowMap = A_crs_->getRowMap()->getLocalMap();
429 auto lclColMap = A_crs_->getColMap()->getLocalMap();
431 determineLocalTriangularStructure(lclMatrix.graph,
434 ignoreMapsForTriStructure);
435 const LO lclNumRows = lclRowMap.getLocalNumElements();
436 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ?
"U" :
"N";
437 this->uplo_ = lclTriStruct.couldBeLowerTriangular ?
"L" : (lclTriStruct.couldBeUpperTriangular ?
"U" :
"N");
441 if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
442 htsImpl_.is_null()) {
444 auto Alocal = A_crs_->getLocalMatrixDevice();
445 auto ptr = Alocal.graph.row_map;
446 auto ind = Alocal.graph.entries;
447 auto val = Alocal.values;
449 auto numRows = Alocal.numRows();
450 auto numCols = Alocal.numCols();
451 auto numNnz = Alocal.nnz();
453 typename decltype(ptr)::non_const_type newptr(
"ptr", ptr.extent(0));
454 typename decltype(ind)::non_const_type newind(
"ind", ind.extent(0));
455 decltype(val) newval(
"val", val.extent(0));
458 typename crs_matrix_type::execution_space().fence();
461 auto A_r = Alocal.row(numRows - 1 - row);
463 auto numEnt = A_r.length;
465 newind(rowStart + k) = numCols - 1 - A_r.colidx(numEnt - 1 - k);
466 newval(rowStart + k) = A_r.value(numEnt - 1 - k);
469 newptr(row + 1) = rowStart;
471 typename crs_matrix_type::execution_space().fence();
477 auto rowMap = A_->getRowMap();
478 auto numElems = rowMap->getLocalNumElements();
479 auto rowElems = rowMap->getLocalElementList();
482 for (
size_t i = 0; i < numElems; i++)
483 newRowElems[i] = rowElems[numElems - 1 - i];
485 newRowMap =
Teuchos::rcp(
new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
489 auto colMap = A_->getColMap();
490 auto numElems = colMap->getLocalNumElements();
491 auto colElems = colMap->getLocalElementList();
494 for (
size_t i = 0; i < numElems; i++)
495 newColElems[i] = colElems[numElems - 1 - i];
497 newColMap =
Teuchos::rcp(
new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
501 local_matrix_type newLocalMatrix(
"Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
505 isInternallyChanged_ =
true;
510 auto newLclTriStructure =
511 determineLocalTriangularStructure(newLocalMatrix.graph,
512 newRowMap->getLocalMap(),
513 newColMap->getLocalMap(),
514 ignoreMapsForTriStructure);
515 const LO newLclNumRows = newRowMap->getLocalNumElements();
516 this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ?
"U" :
"N";
517 this->uplo_ = newLclTriStructure.couldBeLowerTriangular ?
"L" : (newLclTriStructure.couldBeUpperTriangular ?
"U" :
"N");
520 bool prev_ambiguous =
false;
521 bool all_ambiguous =
true;
522 for (
int i = 0; i < num_streams_; i++) {
524 "setMatrix() with a nonnull input matrix before you may call "
525 "initialize() or compute().");
526 auto G = A_crs_v_[i]->getGraph();
528 "but A_crs_'s RowGraph G is null. "
529 "Please report this bug to the Ifpack2 developers.");
534 "If you call this method, "
535 "the matrix's graph must be fill complete. It is not.");
538 constexpr
bool ignoreMapsForTriStructure =
true;
539 std::string prev_uplo = this->uplo_;
540 std::string prev_diag = this->diag_;
541 auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice();
542 auto lclRowMap = A_crs_v_[i]->getRowMap()->getLocalMap();
543 auto lclColMap = A_crs_v_[i]->getColMap()->getLocalMap();
545 determineLocalTriangularStructure(lclMatrix.graph,
548 ignoreMapsForTriStructure);
549 const LO lclNumRows = lclRowMap.getLocalNumElements();
550 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ?
"U" :
"N";
551 const bool could_be_lower = lclTriStruct.couldBeLowerTriangular;
552 const bool could_be_upper = lclTriStruct.couldBeUpperTriangular;
553 if (could_be_lower && could_be_upper) {
555 this->uplo_ = prev_uplo;
556 prev_ambiguous =
true;
558 this->uplo_ = could_be_lower ?
"L" : (could_be_upper ?
"U" :
"N");
559 if (this->uplo_ !=
"N" && prev_uplo ==
"N" && prev_ambiguous) {
560 prev_uplo = this->uplo_;
562 prev_ambiguous =
false;
564 all_ambiguous &= prev_ambiguous;
567 std::logic_error, prefix <<
"A_crs_'s structures in streams "
568 "are different. Please report this bug to the Ifpack2 developers.");
578 htsImpl_->initialize(*A_crs_);
579 isInternallyChanged_ =
true;
582 const bool ksptrsv_valid_uplo = (this->uplo_ !=
"N");
583 kh_v_nonnull_ =
false;
584 if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ !=
"U") {
585 if (!isKokkosKernelsStream_) {
587 const bool is_lower_tri = (this->uplo_ ==
"L") ?
true :
false;
589 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A_,
true);
590 auto Alocal = A_crs->getLocalMatrixDevice();
591 auto ptr = Alocal.graph.row_map;
592 auto ind = Alocal.graph.entries;
593 auto val = Alocal.values;
595 auto numRows = Alocal.numRows();
596 kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
597 KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
599 kh_v_ = std::vector<Teuchos::RCP<k_handle>>(num_streams_);
600 for (
int i = 0; i < num_streams_; i++) {
602 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A_crs_v_[i],
true);
603 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
604 auto ptr_i = Alocal_i.graph.row_map;
605 auto ind_i = Alocal_i.graph.entries;
606 auto val_i = Alocal_i.values;
608 auto numRows_i = Alocal_i.numRows();
610 const bool is_lower_tri = (this->uplo_ ==
"L") ?
true :
false;
611 kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
612 KokkosSparse::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
614 kh_v_nonnull_ =
true;
618 isInitialized_ =
true;
622 template <
class MatrixType>
623 KokkosSparse::Experimental::SPTRSVAlgorithm
625 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
628 if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
629 std::is_same<int, local_ordinal_type>::value &&
630 (std::is_same<scalar_type, float>::value ||
631 std::is_same<scalar_type, double>::value ||
632 std::is_same<impl_scalar_type, Kokkos::complex<float>>::value ||
633 std::is_same<impl_scalar_type, Kokkos::complex<double>>::value)) {
634 return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
637 return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
640 template <
class MatrixType>
643 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::compute: ";
644 if (!out_.is_null()) {
645 *out_ <<
">>> DEBUG " << prefix << std::endl;
648 if (!isKokkosKernelsStream_) {
650 "setMatrix() with a nonnull input matrix before you may call "
651 "initialize() or compute().");
653 "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
657 "method, the matrix must be fill complete. It is not.");
659 for (
int i = 0; i < num_streams_; i++) {
661 "setMatrices() with a nonnull input matrix before you may call "
662 "initialize() or compute().");
666 "method, the matrix must be fill complete. It is not.");
670 if (!isInitialized_) {
674 "been called by this point, but isInitialized_ is false. "
675 "Please report this bug to the Ifpack2 developers.");
684 #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
685 if constexpr (std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda>) {
686 if (this->isKokkosKernelsSptrsv_) {
688 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A_crs_,
true);
689 auto Alocal = A_crs->getLocalMatrixDevice();
690 auto val = Alocal.values;
691 #if (CUSPARSE_VERSION >= 12100)
692 auto* sptrsv_handle = kh_->get_sptrsv_handle();
693 auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
694 cusparseSpSV_updateMatrix(cusparse_handle->handle,
695 cusparse_handle->spsvDescr,
697 CUSPARSE_SPSV_UPDATE_GENERAL);
699 auto ptr = Alocal.graph.row_map;
700 auto ind = Alocal.graph.entries;
701 KokkosSparse::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
703 }
else if (kh_v_nonnull_) {
704 for (
int i = 0; i < num_streams_; i++) {
705 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A_crs_v_[i],
true);
706 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
707 auto val_i = Alocal_i.values;
708 #if (CUSPARSE_VERSION >= 12100)
709 auto* sptrsv_handle = kh_v_[i]->get_sptrsv_handle();
710 auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
711 IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(
712 cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream()));
713 cusparseSpSV_updateMatrix(cusparse_handle->handle,
714 cusparse_handle->spsvDescr,
716 CUSPARSE_SPSV_UPDATE_GENERAL);
718 auto ptr_i = Alocal_i.graph.row_map;
719 auto ind_i = Alocal_i.graph.entries;
720 KokkosSparse::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
730 htsImpl_->compute(*A_crs_, out_);
737 template <
class MatrixType>
748 using Teuchos::rcpFromRef;
751 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::apply: ";
753 if (!out_.is_null()) {
754 *out_ <<
">>> DEBUG " << prefix;
755 if (!isKokkosKernelsStream_) {
756 if (A_crs_.is_null()) {
757 *out_ <<
"A_crs_ is null!" << std::endl;
761 const std::string uplo = this->uplo_;
763 const std::string diag = this->diag_;
764 *out_ <<
"uplo=\"" << uplo
765 <<
"\", trans=\"" << trans
766 <<
"\", diag=\"" << diag <<
"\"" << std::endl;
769 for (
int i = 0; i < num_streams_; i++) {
771 *out_ <<
"A_crs_v_[" << i <<
"]"
772 <<
" is null!" << std::endl;
774 const std::string uplo = this->uplo_;
776 const std::string diag = this->diag_;
777 *out_ <<
"A_crs_v_[" << i <<
"]: "
779 <<
"\", trans=\"" << trans
780 <<
"\", diag=\"" << diag <<
"\"" << std::endl;
787 "been called, or if you have changed the matrix via setMatrix(), you must "
788 "call compute() before you may call this method.");
791 if (!isKokkosKernelsStream_) {
793 "Please report this bug to the Ifpack2 developers.");
795 "Please report this bug to the Ifpack2 developers.");
800 "method, the matrix must be fill complete. It is not. This means that "
801 " you must have called resumeFill() on the matrix before calling apply(). "
802 "This is NOT allowed. Note that this class may use the matrix's data in "
803 "place without copying it. Thus, you cannot change the matrix and expect "
804 "the solver to stay the same. If you have changed the matrix, first call "
805 "fillComplete() on it, then call compute() on this object, before you call"
806 " apply(). You do NOT need to call setMatrix, as long as the matrix "
807 "itself (that is, its address in memory) is the same.");
809 for (
int i = 0; i < num_streams_; i++) {
811 "Please report this bug to the Ifpack2 developers.");
816 "method, the matrix must be fill complete. It is not. This means that "
817 " you must have called resumeFill() on the matrix before calling apply(). "
818 "This is NOT allowed. Note that this class may use the matrix's data in "
819 "place without copying it. Thus, you cannot change the matrix and expect "
820 "the solver to stay the same. If you have changed the matrix, first call "
821 "fillComplete() on it, then call compute() on this object, before you call"
822 " apply(). You do NOT need to call setMatrix, as long as the matrix "
823 "itself (that is, its address in memory) is the same.");
830 if (!isKokkosKernelsStream_) {
831 auto G = A_crs_->getGraph();
833 "but A_crs_'s RowGraph G is null. "
834 "Please report this bug to the Ifpack2 developers.");
835 auto importer = G->getImporter();
836 auto exporter = G->getExporter();
838 if (!importer.is_null()) {
839 if (X_colMap_.is_null() || X_colMap_->getNumVectors() != X.getNumVectors()) {
840 X_colMap_ =
rcp(
new MV(importer->getTargetMap(), X.getNumVectors()));
842 X_colMap_->putScalar(STS::zero());
847 X_colMap_->doImport(X, *importer, Tpetra::ZERO);
849 X_cur = importer.is_null() ? rcpFromRef(X) : Teuchos::rcp_const_cast<
const MV>(X_colMap_);
851 if (!exporter.is_null()) {
852 if (Y_rowMap_.is_null() || Y_rowMap_->getNumVectors() != Y.getNumVectors()) {
853 Y_rowMap_ =
rcp(
new MV(exporter->getSourceMap(), Y.getNumVectors()));
855 Y_rowMap_->putScalar(STS::zero());
857 Y_rowMap_->doExport(Y, *importer, Tpetra::ADD);
859 Y_cur = exporter.is_null() ? rcpFromRef(Y) : Y_rowMap_;
863 X_cur = rcpFromRef(X);
864 Y_cur = rcpFromRef(Y);
867 localApply(*X_cur, *Y_cur, mode, alpha, beta);
869 if (!isKokkosKernelsStream_) {
870 auto G = A_crs_->getGraph();
871 auto exporter = G->getExporter();
872 if (!exporter.is_null()) {
873 Y.putScalar(STS::zero());
874 Y.doExport(*Y_cur, *exporter, Tpetra::ADD);
880 template <
class MatrixType>
888 const char tfecfFuncName[] =
"localTriangularSolve: ";
890 if (!isKokkosKernelsStream_) {
892 "The matrix is not fill complete.");
894 "The matrix is neither upper triangular or lower triangular. "
895 "You may only call this method if the matrix is triangular. "
896 "Remember that this is a local (per MPI process) property, and that "
897 "Tpetra only knows how to do a local (per process) triangular solve.");
899 for (
int i = 0; i < num_streams_; i++) {
901 "The matrix is not fill complete.");
903 "The matrix is neither upper triangular or lower triangular. "
904 "You may only call this method if the matrix is triangular. "
905 "Remember that this is a local (per MPI process) property, and that "
906 "Tpetra only knows how to do a local (per process) triangular solve.");
910 "X and Y must be constant stride.");
914 "not currently support non-conjugated transposed solve (mode == "
915 "Teuchos::TRANS) for complex scalar types.");
917 const std::string uplo = this->uplo_;
919 const size_t numVecs = std::min(X.getNumVectors(), Y.getNumVectors());
921 if (
Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans ==
"N") {
922 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(this->A_);
923 auto A_lclk = A_crs->getLocalMatrixDevice();
924 auto ptr = A_lclk.graph.row_map;
925 auto ind = A_lclk.graph.entries;
926 auto val = A_lclk.values;
928 for (
size_t j = 0; j < numVecs; ++j) {
929 auto X_j = X.getVectorNonConst(j);
930 auto Y_j = Y.getVector(j);
931 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
932 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
933 auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
934 auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
935 KokkosSparse::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
937 typename k_handle::HandleExecSpace().fence();
940 else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans ==
"N") {
941 std::vector<lno_row_view_t> ptr_v(num_streams_);
942 std::vector<lno_nonzero_view_t> ind_v(num_streams_);
943 std::vector<scalar_nonzero_view_t> val_v(num_streams_);
944 std::vector<k_handle*> KernelHandle_rawptr_v_(num_streams_);
945 for (
size_t j = 0; j < numVecs; ++j) {
946 auto X_j = X.getVectorNonConst(j);
947 auto Y_j = Y.getVector(j);
948 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
949 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
950 auto X_lcl_1d = Kokkos::subview(X_lcl, Kokkos::ALL(), 0);
951 auto Y_lcl_1d = Kokkos::subview(Y_lcl, Kokkos::ALL(), 0);
952 std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
953 std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
954 local_ordinal_type stream_begin = 0;
955 local_ordinal_type stream_end;
956 for (
int i = 0; i < num_streams_; i++) {
957 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A_crs_v_[i]);
958 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
959 ptr_v[i] = Alocal_i.graph.row_map;
960 ind_v[i] = Alocal_i.graph.entries;
961 val_v[i] = Alocal_i.values;
962 stream_end = stream_begin + Alocal_i.numRows();
963 x_v[i] = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
964 y_v[i] = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
965 KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
966 stream_begin = stream_end;
969 KokkosSparse::Experimental::sptrsv_solve_streams(exec_space_instances_, KernelHandle_rawptr_v_,
970 ptr_v, ind_v, val_v, y_v, x_v);
975 const std::string diag = this->diag_;
980 auto A_lcl = this->A_crs_->getLocalMatrixHost();
982 if (X.isConstantStride() && Y.isConstantStride()) {
983 auto X_lcl = X.getLocalViewHost(Tpetra::Access::ReadWrite);
984 auto Y_lcl = Y.getLocalViewHost(Tpetra::Access::ReadOnly);
985 KokkosSparse::trsv(uplo.c_str(), trans.c_str(), diag.c_str(),
986 A_lcl, Y_lcl, X_lcl);
988 for (
size_t j = 0; j < numVecs; ++j) {
989 auto X_j = X.getVectorNonConst(j);
990 auto Y_j = Y.getVector(j);
991 auto X_lcl = X_j->getLocalViewHost(Tpetra::Access::ReadWrite);
992 auto Y_lcl = Y_j->getLocalViewHost(Tpetra::Access::ReadOnly);
993 KokkosSparse::trsv(uplo.c_str(), trans.c_str(),
994 diag.c_str(), A_lcl, Y_lcl, X_lcl);
1000 template <
class MatrixType>
1001 void LocalSparseTriangularSolver<MatrixType>::
1002 localApply(
const MV& X,
1005 const scalar_type& alpha,
1006 const scalar_type& beta)
const {
1008 htsImpl_->isComputed()) {
1009 htsImpl_->localApply(X, Y, mode, alpha, beta);
1014 typedef scalar_type ST;
1017 if (beta == STS::zero()) {
1018 if (alpha == STS::zero()) {
1019 Y.putScalar(STS::zero());
1021 this->localTriangularSolve(X, Y, mode);
1022 if (alpha != STS::one()) {
1027 if (alpha == STS::zero()) {
1031 this->localTriangularSolve(X, Y_tmp, mode);
1032 Y.update(alpha, Y_tmp, beta);
1037 template <
class MatrixType>
1040 return numInitialize_;
1043 template <
class MatrixType>
1049 template <
class MatrixType>
1055 template <
class MatrixType>
1059 return initializeTime_;
1062 template <
class MatrixType>
1066 return computeTime_;
1069 template <
class MatrixType>
1076 template <
class MatrixType>
1080 std::ostringstream os;
1085 os <<
"\"Ifpack2::LocalSparseTriangularSolver\": {";
1086 if (this->getObjectLabel() !=
"") {
1087 os <<
"Label: \"" << this->getObjectLabel() <<
"\", ";
1089 os <<
"Initialized: " << (isInitialized() ?
"true" :
"false") <<
", "
1090 <<
"Computed: " << (isComputed() ?
"true" :
"false") <<
", ";
1092 if (isKokkosKernelsSptrsv_) os <<
"KK-SPTRSV, ";
1093 if (isKokkosKernelsStream_) os <<
"KK-SolveStream, ";
1096 os <<
"Matrix: null";
1098 os <<
"Matrix dimensions: ["
1099 << A_->getGlobalNumRows() <<
", "
1100 << A_->getGlobalNumCols() <<
"]"
1101 <<
", Number of nonzeros: " << A_->getGlobalNumEntries();
1105 os <<
", HTS computed: " << (htsImpl_->isComputed() ?
"true" :
"false");
1110 template <
class MatrixType>
1124 auto comm = A_.
is_null() ? Tpetra::getDefaultComm() : A_->getComm();
1128 if (!comm.is_null() && comm->getRank() == 0) {
1133 out <<
"\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1143 template <
class MatrixType>
1148 "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1149 "The matrix is null. Please call setMatrix() with a nonnull input "
1150 "before calling this method.");
1151 return A_->getDomainMap();
1154 template <
class MatrixType>
1159 "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1160 "The matrix is null. Please call setMatrix() with a nonnull input "
1161 "before calling this method.");
1162 return A_->getRangeMap();
1165 template <
class MatrixType>
1168 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1177 A->getLocalNumRows() != A->getLocalNumCols(),
1178 std::runtime_error, prefix <<
"If A's communicator only contains one "
1179 "process, then A must be square. Instead, you provided a matrix A with "
1180 << A->getLocalNumRows() <<
" rows and " << A->getLocalNumCols() <<
" columns.");
1185 isInitialized_ =
false;
1186 isComputed_ =
false;
1189 A_crs_ = Teuchos::null;
1204 template <
class MatrixType>
1207 const std::vector<HandleExecSpace>& exec_space_instances) {
1208 isKokkosKernelsStream_ = isKokkosKernelsStream;
1209 num_streams_ = num_streams;
1210 exec_space_instances_ = exec_space_instances;
1211 A_crs_v_ = std::vector<Teuchos::RCP<crs_matrix_type>>(num_streams_);
1214 template <
class MatrixType>
1217 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1219 for (
int i = 0; i < num_streams_; i++) {
1224 if (A_crs_v[i].getRawPtr() != A_crs_v_[i].getRawPtr() || isInternallyChanged_) {
1227 A_crs_v[i]->getLocalNumRows() != A_crs_v[i]->getLocalNumCols(),
1228 std::runtime_error, prefix <<
"If A's communicator only contains one "
1229 "process, then A must be square. Instead, you provided a matrix A with "
1230 << A_crs_v[i]->getLocalNumRows() <<
" rows and " << A_crs_v[i]->getLocalNumCols() <<
" columns.");
1235 isInitialized_ =
false;
1236 isComputed_ =
false;
1239 A_crs_v_[i] = Teuchos::null;
1244 A_crs_v_[i] = A_crs;
1252 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S, LO, GO, N) \
1253 template class Ifpack2::LocalSparseTriangularSolver<Tpetra::RowMatrix<S, LO, GO, N>>;
1255 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:739
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1045
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1079
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
"Numeric" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:642
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:191
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1072
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1065
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1058
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner's matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1216
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1146
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void initialize()
"Symbolic" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:392
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1039
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver's stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1206
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner's matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1167
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string ¶mName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1051
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1157
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:77
void setParameters(const Teuchos::ParameterList ¶ms)
Set this object's parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:348
MatrixType::scalar_type scalar_type
Scalar type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:56
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1112
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:332
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:296
static std::string name()
std::string typeName(const T &t)