10 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
16 #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
17 #include "Tpetra_CrsMatrix.hpp"
18 #include "Tpetra_Core.hpp"
19 #include "Teuchos_StandardParameterEntryValidators.hpp"
20 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
21 #include "KokkosSparse_trsv.hpp"
23 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
24 # include "shylu_hts.hpp"
31 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
33 inline void cusparse_error_throw(cusparseStatus_t cusparseStatus,
const char* name,
34 const char* file,
const int line) {
35 std::ostringstream out;
36 #if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)
37 out << name <<
" error( " << cusparseGetErrorName(cusparseStatus) <<
"): " << cusparseGetErrorString(cusparseStatus);
39 out << name <<
" error( ";
40 switch (cusparseStatus) {
41 case CUSPARSE_STATUS_NOT_INITIALIZED:
42 out <<
"CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not "
45 case CUSPARSE_STATUS_ALLOC_FAILED:
46 out <<
"CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too "
49 case CUSPARSE_STATUS_INVALID_VALUE: out <<
"CUSPARSE_STATUS_INVALID_VALUE)";
break;
50 case CUSPARSE_STATUS_ARCH_MISMATCH: out <<
"CUSPARSE_STATUS_ARCH_MISMATCH)";
break;
51 case CUSPARSE_STATUS_MAPPING_ERROR: out <<
"CUSPARSE_STATUS_MAPPING_ERROR)";
break;
52 case CUSPARSE_STATUS_EXECUTION_FAILED: out <<
"CUSPARSE_STATUS_EXECUTION_FAILED)";
break;
53 case CUSPARSE_STATUS_INTERNAL_ERROR: out <<
"CUSPARSE_STATUS_INTERNAL_ERROR)";
break;
54 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out <<
"CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)";
break;
55 case CUSPARSE_STATUS_ZERO_PIVOT: out <<
"CUSPARSE_STATUS_ZERO_PIVOT)";
break;
56 default: out <<
"unrecognized error code): this is bad!";
break;
58 #endif // CUSPARSE_VERSION
60 out <<
" " << file <<
":" << line;
62 throw std::runtime_error(out.str());
65 inline void cusparse_safe_call(cusparseStatus_t cusparseStatus,
const char* name,
const char* file =
nullptr,
67 if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) {
68 cusparse_error_throw(cusparseStatus, name, file, line);
72 #define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \
73 Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__)
75 #endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
77 struct TrisolverType {
86 type_strs[0] =
"Internal";
88 type_strs[2] =
"KSPTRSV";
90 type_enums[0] = Internal;
92 type_enums[2] = KSPTRSV;
97 template<
class MatrixType>
98 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
100 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type>
crs_matrix_type;
103 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
104 Timpl_ = Teuchos::null;
105 levelset_block_size_ = 1;
111 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
112 const char* block_size_s =
"trisolver: block size";
115 "The parameter \"" << block_size_s <<
"\" must be of type int.");
116 levelset_block_size_ = pl.
get<
int>(block_size_s);
118 if (levelset_block_size_ < 1)
119 levelset_block_size_ = 1;
128 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
130 transpose_ = conjugate_ =
false;
137 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
140 auto rowptr = T_in.getLocalRowPtrsHost();
141 auto colidx = T_in.getLocalIndicesHost();
142 auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
146 HTST::make_CrsMatrix(rowptr.size() - 1,
147 rowptr.data(), colidx.data(),
150 transpose_, conjugate_),
151 HtsCrsMatrixDeleter());
155 HTST::reprocess_numeric(Timpl_.get(), T_hts.
get());
158 if (T_in.getCrsGraph().is_null()) {
160 *out <<
"HTS compute failed because T_in.getCrsGraph().is_null().\n";
163 if ( ! T_in.getCrsGraph()->isSorted()) {
165 *out <<
"HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
168 if ( ! T_in.isStorageOptimized()) {
170 *out <<
"HTS compute failed because ! T_in.isStorageOptimized().\n";
174 typename HTST::PreprocessArgs args;
175 args.T = T_hts.
get();
178 args.nthreads = omp_get_max_threads();
182 args.save_for_reprocess =
true;
183 typename HTST::Options opts;
184 opts.levelset_block_size = levelset_block_size_;
185 args.options = &opts;
188 Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
189 }
catch (
const std::exception& e) {
191 *out <<
"HTS preprocess threw: " << e.what() <<
"\n";
200 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
208 void localApply (
const MV& X, MV& Y,
215 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216 const auto& X_view = X.getLocalViewHost (Tpetra::Access::ReadOnly);
217 const auto& Y_view = Y.getLocalViewHost (Tpetra::Access::ReadWrite);
220 HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
222 HTST::solve_omp(Timpl_.get(),
224 reinterpret_cast<const scalar_type*
>(X_view.data()),
233 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
234 typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
235 typedef typename HTST::Impl TImpl;
236 typedef typename HTST::CrsMatrix HtsCrsMatrix;
238 struct TImplDeleter {
239 void free (TImpl* impl) {
240 HTST::delete_Impl(impl);
244 struct HtsCrsMatrixDeleter {
245 void free (HtsCrsMatrix* T) {
246 HTST::delete_CrsMatrix(T);
251 bool transpose_, conjugate_;
252 int levelset_block_size_;
256 template<
class MatrixType>
267 (A_crs.
is_null (), std::invalid_argument,
268 "Ifpack2::LocalSparseTriangularSolver constructor: "
269 "The input matrix A is not a Tpetra::CrsMatrix.");
274 template<
class MatrixType>
283 *out_ <<
">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
291 (A_crs.
is_null (), std::invalid_argument,
292 "Ifpack2::LocalSparseTriangularSolver constructor: "
293 "The input matrix A is not a Tpetra::CrsMatrix.");
298 template<
class MatrixType>
305 template<
class MatrixType>
312 *out_ <<
">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
317 template<
class MatrixType>
320 isInitialized_ =
false;
322 reverseStorage_ =
false;
323 isInternallyChanged_ =
false;
327 initializeTime_ = 0.0;
330 isKokkosKernelsSptrsv_ =
false;
331 isKokkosKernelsStream_ =
false;
337 template<
class MatrixType>
341 if (!isKokkosKernelsStream_) {
343 kh_->destroy_sptrsv_handle();
347 for (
int i = 0; i < num_streams_; i++) {
349 kh_v_[i]->destroy_sptrsv_handle();
355 template<
class MatrixType>
364 Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
366 static const char typeName[] =
"trisolver: type";
368 if ( ! pl.
isType<std::string>(typeName))
break;
371 Array<std::string> trisolverTypeStrs;
372 Array<Details::TrisolverType::Enum> trisolverTypeEnums;
373 Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
375 s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName,
false);
380 if (trisolverType == Details::TrisolverType::HTS) {
382 htsImpl_->setParameters (pl);
385 if (trisolverType == Details::TrisolverType::KSPTRSV) {
386 this->isKokkosKernelsSptrsv_ =
true;
389 this->isKokkosKernelsSptrsv_ =
false;
393 reverseStorage_ = pl.
get<
bool>(
"trisolver: reverse U");
396 (reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
397 std::logic_error,
"Ifpack2::LocalSparseTriangularSolver::setParameters: "
398 "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
399 "options. See GitHub issue #2647.");
402 template<
class MatrixType>
407 using Tpetra::Details::determineLocalTriangularStructure;
409 using local_matrix_type =
typename crs_matrix_type::local_matrix_device_type;
412 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::initialize: ";
413 if (! out_.is_null ()) {
414 *out_ <<
">>> DEBUG " << prefix << std::endl;
417 if (!isKokkosKernelsStream_) {
419 (A_.
is_null (), std::runtime_error, prefix <<
"You must call "
420 "setMatrix() with a nonnull input matrix before you may call "
421 "initialize() or compute().");
422 if (A_crs_.is_null ()) {
425 (A_crs.get () ==
nullptr, std::invalid_argument,
426 prefix <<
"The input matrix A is not a Tpetra::CrsMatrix.");
429 auto G = A_crs_->getGraph ();
431 (G.is_null (), std::logic_error, prefix <<
"A_ and A_crs_ are nonnull, "
432 "but A_crs_'s RowGraph G is null. "
433 "Please report this bug to the Ifpack2 developers.");
438 (! G->isFillComplete (), std::runtime_error,
"If you call this method, "
439 "the matrix's graph must be fill complete. It is not.");
442 constexpr
bool ignoreMapsForTriStructure =
true;
443 auto lclTriStructure = [&] {
444 auto lclMatrix = A_crs_->getLocalMatrixDevice ();
445 auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
446 auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
448 determineLocalTriangularStructure (lclMatrix.graph,
451 ignoreMapsForTriStructure);
452 const LO lclNumRows = lclRowMap.getLocalNumElements ();
453 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ?
"U" :
"N";
454 this->uplo_ = lclTriStruct.couldBeLowerTriangular ?
"L" :
455 (lclTriStruct.couldBeUpperTriangular ?
"U" :
"N");
459 if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
460 htsImpl_.is_null ()) {
462 auto Alocal = A_crs_->getLocalMatrixDevice();
463 auto ptr = Alocal.graph.row_map;
464 auto ind = Alocal.graph.entries;
465 auto val = Alocal.values;
467 auto numRows = Alocal.numRows();
468 auto numCols = Alocal.numCols();
469 auto numNnz = Alocal.nnz();
471 typename decltype(ptr)::non_const_type newptr (
"ptr", ptr.extent (0));
472 typename decltype(ind)::non_const_type newind (
"ind", ind.extent (0));
473 decltype(val) newval (
"val", val.extent (0));
476 typename crs_matrix_type::execution_space().fence();
479 auto A_r = Alocal.row(numRows-1 - row);
481 auto numEnt = A_r.length;
483 newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
484 newval(rowStart + k) = A_r.value (numEnt-1 - k);
487 newptr(row+1) = rowStart;
489 typename crs_matrix_type::execution_space().fence();
495 auto rowMap = A_->getRowMap();
496 auto numElems = rowMap->getLocalNumElements();
497 auto rowElems = rowMap->getLocalElementList();
500 for (
size_t i = 0; i < numElems; i++)
501 newRowElems[i] = rowElems[numElems-1 - i];
503 newRowMap =
Teuchos::rcp(
new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
507 auto colMap = A_->getColMap();
508 auto numElems = colMap->getLocalNumElements();
509 auto colElems = colMap->getLocalElementList();
512 for (
size_t i = 0; i < numElems; i++)
513 newColElems[i] = colElems[numElems-1 - i];
515 newColMap =
Teuchos::rcp(
new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
519 local_matrix_type newLocalMatrix(
"Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
523 isInternallyChanged_ =
true;
528 auto newLclTriStructure =
529 determineLocalTriangularStructure (newLocalMatrix.graph,
530 newRowMap->getLocalMap (),
531 newColMap->getLocalMap (),
532 ignoreMapsForTriStructure);
533 const LO newLclNumRows = newRowMap->getLocalNumElements ();
534 this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ?
"U" :
"N";
535 this->uplo_ = newLclTriStructure.couldBeLowerTriangular ?
"L" :
536 (newLclTriStructure.couldBeUpperTriangular ?
"U" :
"N");
540 for (
int i = 0; i < num_streams_; i++) {
542 (A_crs_v_[i].
is_null (), std::runtime_error, prefix <<
"You must call "
543 "setMatrix() with a nonnull input matrix before you may call "
544 "initialize() or compute().");
545 auto G = A_crs_v_[i]->getGraph ();
547 (G.is_null (), std::logic_error, prefix <<
"A_crs_ are nonnull, "
548 "but A_crs_'s RowGraph G is null. "
549 "Please report this bug to the Ifpack2 developers.");
554 (! G->isFillComplete (), std::runtime_error,
"If you call this method, "
555 "the matrix's graph must be fill complete. It is not.");
558 constexpr
bool ignoreMapsForTriStructure =
true;
559 std::string prev_uplo_ = this->uplo_;
560 std::string prev_diag_ = this->diag_;
561 auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice ();
562 auto lclRowMap = A_crs_v_[i]->getRowMap ()->getLocalMap ();
563 auto lclColMap = A_crs_v_[i]->getColMap ()->getLocalMap ();
565 determineLocalTriangularStructure (lclMatrix.graph,
568 ignoreMapsForTriStructure);
569 const LO lclNumRows = lclRowMap.getLocalNumElements ();
570 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ?
"U" :
"N";
571 this->uplo_ = lclTriStruct.couldBeLowerTriangular ?
"L" :
572 (lclTriStruct.couldBeUpperTriangular ?
"U" :
"N");
575 ((this->diag_ != prev_diag_) || (this->uplo_ != prev_uplo_),
576 std::logic_error, prefix <<
"A_crs_'s structures in streams "
577 "are different. Please report this bug to the Ifpack2 developers.");
584 htsImpl_->initialize (*A_crs_);
585 isInternallyChanged_ =
true;
588 const bool ksptrsv_valid_uplo = (this->uplo_ !=
"N");
589 kh_v_nonnull_ =
false;
590 if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ !=
"U")
592 if (!isKokkosKernelsStream_) {
594 const bool is_lower_tri = (this->uplo_ ==
"L") ?
true :
false;
596 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_,
true);
597 auto Alocal = A_crs->getLocalMatrixDevice();
598 auto ptr = Alocal.graph.row_map;
599 auto ind = Alocal.graph.entries;
600 auto val = Alocal.values;
602 auto numRows = Alocal.numRows();
603 kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
604 KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
606 kh_v_ = std::vector< Teuchos::RCP<k_handle> >(num_streams_);
607 for (
int i = 0; i < num_streams_; i++) {
609 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_v_[i],
true);
610 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
611 auto ptr_i = Alocal_i.graph.row_map;
612 auto ind_i = Alocal_i.graph.entries;
613 auto val_i = Alocal_i.values;
615 auto numRows_i = Alocal_i.numRows();
617 const bool is_lower_tri = (this->uplo_ ==
"L") ?
true :
false;
618 kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
619 KokkosSparse::Experimental::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
621 kh_v_nonnull_ =
true;
625 isInitialized_ =
true;
629 template<
class MatrixType>
630 KokkosSparse::Experimental::SPTRSVAlgorithm
633 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
636 if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
637 std::is_same<int, local_ordinal_type>::value &&
638 (std::is_same<scalar_type, float>::value ||
639 std::is_same<scalar_type, double>::value ||
640 std::is_same<scalar_type, Kokkos::complex<float>>::value ||
641 std::is_same<scalar_type, Kokkos::complex<double>>::value))
643 return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
646 return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
649 template<
class MatrixType>
654 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::compute: ";
655 if (! out_.is_null ()) {
656 *out_ <<
">>> DEBUG " << prefix << std::endl;
659 if (!isKokkosKernelsStream_) {
661 (A_.
is_null (), std::runtime_error, prefix <<
"You must call "
662 "setMatrix() with a nonnull input matrix before you may call "
663 "initialize() or compute().");
665 (A_crs_.is_null (), std::logic_error, prefix <<
"A_ is nonnull, but "
666 "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
669 (! A_crs_->isFillComplete (), std::runtime_error,
"If you call this "
670 "method, the matrix must be fill complete. It is not.");
673 for(
int i = 0; i < num_streams_; i++) {
675 (A_crs_v_[i].
is_null (), std::runtime_error, prefix <<
"You must call "
676 "setMatrices() with a nonnull input matrix before you may call "
677 "initialize() or compute().");
680 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
"If you call this "
681 "method, the matrix must be fill complete. It is not.");
685 if (! isInitialized_) {
689 (! isInitialized_, std::logic_error, prefix <<
"initialize() should have "
690 "been called by this point, but isInitialized_ is false. "
691 "Please report this bug to the Ifpack2 developers.");
700 #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
701 if constexpr ( std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda> )
703 if (this->isKokkosKernelsSptrsv_) {
705 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_,
true);
706 auto Alocal = A_crs->getLocalMatrixDevice();
707 auto val = Alocal.values;
708 #if (CUSPARSE_VERSION >= 12100)
709 auto *sptrsv_handle = kh_->get_sptrsv_handle();
710 auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
711 cusparseSpSV_updateMatrix(cusparse_handle->handle,
712 cusparse_handle->spsvDescr,
714 CUSPARSE_SPSV_UPDATE_GENERAL);
716 auto ptr = Alocal.graph.row_map;
717 auto ind = Alocal.graph.entries;
718 KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
720 }
else if (kh_v_nonnull_) {
721 for (
int i = 0; i < num_streams_; i++) {
722 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_v_[i],
true);
723 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
724 auto val_i = Alocal_i.values;
725 #if (CUSPARSE_VERSION >= 12100)
726 auto *sptrsv_handle = kh_v_[i]->get_sptrsv_handle();
727 auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
728 IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(
729 cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream()));
730 cusparseSpSV_updateMatrix(cusparse_handle->handle,
731 cusparse_handle->spsvDescr,
733 CUSPARSE_SPSV_UPDATE_GENERAL);
735 auto ptr_i = Alocal_i.graph.row_map;
736 auto ind_i = Alocal_i.graph.entries;
737 KokkosSparse::Experimental::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
747 htsImpl_->compute (*A_crs_, out_);
754 template<
class MatrixType>
766 using Teuchos::rcpFromRef;
769 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::apply: ";
771 if (! out_.is_null ()) {
772 *out_ <<
">>> DEBUG " << prefix;
773 if (!isKokkosKernelsStream_) {
774 if (A_crs_.is_null ()) {
775 *out_ <<
"A_crs_ is null!" << std::endl;
780 const std::string uplo = this->uplo_;
783 const std::string diag = this->diag_;
784 *out_ <<
"uplo=\"" << uplo
785 <<
"\", trans=\"" << trans
786 <<
"\", diag=\"" << diag <<
"\"" << std::endl;
790 for (
int i = 0; i < num_streams_; i++) {
792 *out_ <<
"A_crs_v_[" << i <<
"]" <<
" is null!" << std::endl;
795 const std::string uplo = this->uplo_;
798 const std::string diag = this->diag_;
799 *out_ <<
"A_crs_v_[" << i <<
"]: "
801 <<
"\", trans=\"" << trans
802 <<
"\", diag=\"" << diag <<
"\"" << std::endl;
809 (! isComputed (), std::runtime_error, prefix <<
"If compute() has not yet "
810 "been called, or if you have changed the matrix via setMatrix(), you must "
811 "call compute() before you may call this method.");
814 if (!isKokkosKernelsStream_) {
816 (A_.
is_null (), std::logic_error, prefix <<
"A_ is null. "
817 "Please report this bug to the Ifpack2 developers.");
819 (A_crs_.is_null (), std::logic_error, prefix <<
"A_crs_ is null. "
820 "Please report this bug to the Ifpack2 developers.");
824 (! A_crs_->isFillComplete (), std::runtime_error,
"If you call this "
825 "method, the matrix must be fill complete. It is not. This means that "
826 " you must have called resumeFill() on the matrix before calling apply(). "
827 "This is NOT allowed. Note that this class may use the matrix's data in "
828 "place without copying it. Thus, you cannot change the matrix and expect "
829 "the solver to stay the same. If you have changed the matrix, first call "
830 "fillComplete() on it, then call compute() on this object, before you call"
831 " apply(). You do NOT need to call setMatrix, as long as the matrix "
832 "itself (that is, its address in memory) is the same.");
835 for (
int i = 0; i < num_streams_; i++) {
837 (A_crs_v_[i].
is_null (), std::logic_error, prefix <<
"A_crs_ is null. "
838 "Please report this bug to the Ifpack2 developers.");
842 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
"If you call this "
843 "method, the matrix must be fill complete. It is not. This means that "
844 " you must have called resumeFill() on the matrix before calling apply(). "
845 "This is NOT allowed. Note that this class may use the matrix's data in "
846 "place without copying it. Thus, you cannot change the matrix and expect "
847 "the solver to stay the same. If you have changed the matrix, first call "
848 "fillComplete() on it, then call compute() on this object, before you call"
849 " apply(). You do NOT need to call setMatrix, as long as the matrix "
850 "itself (that is, its address in memory) is the same.");
857 if (!isKokkosKernelsStream_) {
858 auto G = A_crs_->getGraph ();
860 (G.is_null (), std::logic_error, prefix <<
"A_ and A_crs_ are nonnull, "
861 "but A_crs_'s RowGraph G is null. "
862 "Please report this bug to the Ifpack2 developers.");
863 auto importer = G->getImporter ();
864 auto exporter = G->getExporter ();
866 if (! importer.is_null ()) {
867 if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
868 X_colMap_ =
rcp (
new MV (importer->getTargetMap (), X.getNumVectors ()));
871 X_colMap_->putScalar (STS::zero ());
876 X_colMap_->doImport (X, *importer, Tpetra::ZERO);
878 X_cur = importer.is_null () ? rcpFromRef (X) :
879 Teuchos::rcp_const_cast<
const MV> (X_colMap_);
881 if (! exporter.is_null ()) {
882 if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
883 Y_rowMap_ =
rcp (
new MV (exporter->getSourceMap (), Y.getNumVectors ()));
886 Y_rowMap_->putScalar (STS::zero ());
888 Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
890 Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
895 X_cur = rcpFromRef (X);
896 Y_cur = rcpFromRef (Y);
899 localApply (*X_cur, *Y_cur, mode, alpha, beta);
901 if (!isKokkosKernelsStream_) {
902 auto G = A_crs_->getGraph ();
903 auto exporter = G->getExporter ();
904 if (! exporter.is_null ()) {
905 Y.putScalar (STS::zero ());
906 Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
912 template<
class MatrixType>
922 const char tfecfFuncName[] =
"localTriangularSolve: ";
924 if (!isKokkosKernelsStream_)
927 (! A_crs_->isFillComplete (), std::runtime_error,
928 "The matrix is not fill complete.");
930 ( A_crs_->getLocalNumRows() > 0 && this->uplo_ ==
"N", std::runtime_error,
931 "The matrix is neither upper triangular or lower triangular. "
932 "You may only call this method if the matrix is triangular. "
933 "Remember that this is a local (per MPI process) property, and that "
934 "Tpetra only knows how to do a local (per process) triangular solve.");
938 for (
int i = 0; i < num_streams_; i++) {
940 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
941 "The matrix is not fill complete.");
943 ( A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ ==
"N", std::runtime_error,
944 "The matrix is neither upper triangular or lower triangular. "
945 "You may only call this method if the matrix is triangular. "
946 "Remember that this is a local (per MPI process) property, and that "
947 "Tpetra only knows how to do a local (per process) triangular solve.");
951 (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
952 "X and Y must be constant stride.");
955 (STS::isComplex && mode == TRANS, std::logic_error,
"This method does "
956 "not currently support non-conjugated transposed solve (mode == "
957 "Teuchos::TRANS) for complex scalar types.");
959 const std::string uplo = this->uplo_;
962 const size_t numVecs = std::min (X.getNumVectors (), Y.getNumVectors ());
966 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (this->A_);
967 auto A_lclk = A_crs->getLocalMatrixDevice ();
968 auto ptr = A_lclk.graph.row_map;
969 auto ind = A_lclk.graph.entries;
970 auto val = A_lclk.values;
972 for (
size_t j = 0; j < numVecs; ++j) {
973 auto X_j = X.getVectorNonConst (j);
974 auto Y_j = Y.getVector (j);
975 auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
976 auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
977 auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
978 auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
979 KokkosSparse::Experimental::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
981 typename k_handle::HandleExecSpace().fence();
984 else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans ==
"N")
986 std::vector<lno_row_view_t> ptr_v(num_streams_);
987 std::vector<lno_nonzero_view_t> ind_v(num_streams_);
988 std::vector<scalar_nonzero_view_t> val_v(num_streams_);
989 std::vector<k_handle *> KernelHandle_rawptr_v_(num_streams_);
990 for (
size_t j = 0; j < numVecs; ++j) {
991 auto X_j = X.getVectorNonConst (j);
992 auto Y_j = Y.getVector (j);
993 auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
994 auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
995 auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
996 auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
997 std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
998 std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
999 local_ordinal_type stream_begin = 0;
1000 local_ordinal_type stream_end;
1001 for (
int i = 0; i < num_streams_; i++) {
1002 auto A_crs_i = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_crs_v_[i]);
1003 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
1004 ptr_v[i] = Alocal_i.graph.row_map;
1005 ind_v[i] = Alocal_i.graph.entries;
1006 val_v[i] = Alocal_i.values;
1007 stream_end = stream_begin + Alocal_i.numRows();
1008 x_v[i] = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1009 y_v[i] = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);;
1010 KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
1011 stream_begin = stream_end;
1014 KokkosSparse::Experimental::sptrsv_solve_streams( exec_space_instances_, KernelHandle_rawptr_v_,
1015 ptr_v, ind_v, val_v, y_v, x_v );
1021 const std::string diag = this->diag_;
1026 auto A_lcl = this->A_crs_->getLocalMatrixHost ();
1028 if (X.isConstantStride () && Y.isConstantStride ()) {
1029 auto X_lcl = X.getLocalViewHost (Tpetra::Access::ReadWrite);
1030 auto Y_lcl = Y.getLocalViewHost (Tpetra::Access::ReadOnly);
1031 KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
1032 A_lcl, Y_lcl, X_lcl);
1035 for (
size_t j = 0; j < numVecs; ++j) {
1036 auto X_j = X.getVectorNonConst (j);
1037 auto Y_j = Y.getVector (j);
1038 auto X_lcl = X_j->getLocalViewHost (Tpetra::Access::ReadWrite);
1039 auto Y_lcl = Y_j->getLocalViewHost (Tpetra::Access::ReadOnly);
1040 KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
1041 diag.c_str (), A_lcl, Y_lcl, X_lcl);
1047 template<
class MatrixType>
1049 LocalSparseTriangularSolver<MatrixType>::
1050 localApply (
const MV& X,
1053 const scalar_type& alpha,
1054 const scalar_type& beta)
const
1057 htsImpl_->isComputed ()) {
1058 htsImpl_->localApply (X, Y, mode, alpha, beta);
1063 typedef scalar_type ST;
1066 if (beta == STS::zero ()) {
1067 if (alpha == STS::zero ()) {
1068 Y.putScalar (STS::zero ());
1071 this->localTriangularSolve (X, Y, mode);
1072 if (alpha != STS::one ()) {
1078 if (alpha == STS::zero ()) {
1083 this->localTriangularSolve (X, Y_tmp, mode);
1084 Y.update (alpha, Y_tmp, beta);
1090 template <
class MatrixType>
1094 return numInitialize_;
1097 template <
class MatrixType>
1104 template <
class MatrixType>
1111 template <
class MatrixType>
1115 return initializeTime_;
1118 template<
class MatrixType>
1122 return computeTime_;
1125 template<
class MatrixType>
1132 template <
class MatrixType>
1137 std::ostringstream os;
1142 os <<
"\"Ifpack2::LocalSparseTriangularSolver\": {";
1143 if (this->getObjectLabel () !=
"") {
1144 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
1146 os <<
"Initialized: " << (isInitialized () ?
"true" :
"false") <<
", "
1147 <<
"Computed: " << (isComputed () ?
"true" :
"false") <<
", ";
1149 if(isKokkosKernelsSptrsv_) os <<
"KK-SPTRSV, ";
1150 if(isKokkosKernelsStream_) os <<
"KK-SolveStream, ";
1153 os <<
"Matrix: null";
1156 os <<
"Matrix dimensions: ["
1157 << A_->getGlobalNumRows () <<
", "
1158 << A_->getGlobalNumCols () <<
"]"
1159 <<
", Number of nonzeros: " << A_->getGlobalNumEntries();
1163 os <<
", HTS computed: " << (htsImpl_->isComputed () ?
"true" :
"false");
1168 template <
class MatrixType>
1185 Tpetra::getDefaultComm () :
1190 if (! comm.is_null () && comm->getRank () == 0) {
1195 out <<
"\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1205 template <
class MatrixType>
1211 (A_.
is_null (), std::runtime_error,
1212 "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1213 "The matrix is null. Please call setMatrix() with a nonnull input "
1214 "before calling this method.");
1215 return A_->getDomainMap ();
1218 template <
class MatrixType>
1224 (A_.
is_null (), std::runtime_error,
1225 "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1226 "The matrix is null. Please call setMatrix() with a nonnull input "
1227 "before calling this method.");
1228 return A_->getRangeMap ();
1231 template<
class MatrixType>
1235 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1244 (! A.
is_null () && A->getComm ()->getSize () == 1 &&
1245 A->getLocalNumRows () != A->getLocalNumCols (),
1246 std::runtime_error, prefix <<
"If A's communicator only contains one "
1247 "process, then A must be square. Instead, you provided a matrix A with "
1248 << A->getLocalNumRows () <<
" rows and " << A->getLocalNumCols ()
1254 isInitialized_ =
false;
1255 isComputed_ =
false;
1258 A_crs_ = Teuchos::null;
1265 (A_crs.
is_null (), std::invalid_argument, prefix <<
1266 "The input matrix A is not a Tpetra::CrsMatrix.");
1276 template<
class MatrixType>
1280 const std::vector<HandleExecSpace>& exec_space_instances)
1282 isKokkosKernelsStream_ = isKokkosKernelsStream;
1283 num_streams_ = num_streams;
1284 exec_space_instances_ = exec_space_instances;
1285 A_crs_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
1288 template<
class MatrixType>
1292 const char prefix[] =
"Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1294 for(
int i = 0; i < num_streams_; i++) {
1299 if (A_crs_v[i].getRawPtr () != A_crs_v_[i].getRawPtr () || isInternallyChanged_) {
1302 (! A_crs_v[i].
is_null () && A_crs_v[i]->getComm ()->getSize () == 1 &&
1303 A_crs_v[i]->getLocalNumRows () != A_crs_v[i]->getLocalNumCols (),
1304 std::runtime_error, prefix <<
"If A's communicator only contains one "
1305 "process, then A must be square. Instead, you provided a matrix A with "
1306 << A_crs_v[i]->getLocalNumRows () <<
" rows and " << A_crs_v[i]->getLocalNumCols ()
1312 isInitialized_ =
false;
1313 isComputed_ =
false;
1316 A_crs_v_[i] = Teuchos::null;
1322 (A_crs.
is_null (), std::invalid_argument, prefix <<
1323 "The input matrix A is not a Tpetra::CrsMatrix.");
1324 A_crs_v_[i] = A_crs;
1332 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
1333 template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
1335 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
bool is_null(const boost::shared_ptr< T > &p)
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:756
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1100
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1135
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
"Numeric" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:652
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:189
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1128
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1121
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1114
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner's matrices (used by stream interface of triangular solve).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1290
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1208
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void initialize()
"Symbolic" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:405
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1093
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver's stream information.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1279
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner's matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1233
void resize(size_type new_size, const value_type &x=value_type())
IntegralType getIntegralValue(const std::string &str, const std::string ¶mName="", const std::string &sublistName="") const
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1107
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1221
bool isType(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:75
void setParameters(const Teuchos::ParameterList ¶ms)
Set this object's parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:358
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:58
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:1170
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:339
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:300
static std::string name()
std::string typeName(const T &t)