41 #ifndef IFPACK2_CRSRILUK_DEF_HPP
42 #define IFPACK2_CRSRILUK_DEF_HPP
44 #include "Ifpack2_LocalFilter.hpp"
45 #include "Tpetra_CrsMatrix.hpp"
46 #include "Teuchos_StandardParameterEntryValidators.hpp"
47 #include "Ifpack2_LocalSparseTriangularSolver.hpp"
48 #include "Ifpack2_Details_getParamTryingTypes.hpp"
49 #include "Ifpack2_Details_getCrsMatrix.hpp"
50 #include "Kokkos_Sort.hpp"
51 #include "KokkosSparse_Utils.hpp"
52 #include "KokkosKernels_Sorting.hpp"
53 #include "KokkosSparse_IOUtils.hpp"
66 type_strs[0] =
"Serial";
67 type_strs[1] =
"KSPILUK";
69 type_enums[0] = Serial;
70 type_enums[1] = KSPILUK;
75 template<
class MatrixType>
81 isInitialized_ (false),
86 initializeTime_ (0.0),
92 isKokkosKernelsSpiluk_(false),
93 isKokkosKernelsStream_(false),
95 hasStreamReordered_(false)
101 template<
class MatrixType>
106 isAllocated_ (false),
107 isInitialized_ (false),
112 initializeTime_ (0.0),
118 isKokkosKernelsSpiluk_(false),
119 isKokkosKernelsStream_(false),
121 hasStreamReordered_(false)
127 template<
class MatrixType>
130 if (!isKokkosKernelsStream_) {
132 KernelHandle_->destroy_spiluk_handle();
136 for (
int i = 0; i < num_streams_; i++) {
138 KernelHandle_v_[i]->destroy_spiluk_handle();
144 template<
class MatrixType>
148 L_solver_->setObjectLabel(
"lower");
150 U_solver_->setObjectLabel(
"upper");
153 template<
class MatrixType>
162 isAllocated_ =
false;
163 isInitialized_ =
false;
165 A_local_ = Teuchos::null;
166 Graph_ = Teuchos::null;
173 if (! L_solver_.is_null ()) {
174 L_solver_->setMatrix (Teuchos::null);
176 if (! U_solver_.is_null ()) {
177 U_solver_->setMatrix (Teuchos::null);
189 template<
class MatrixType>
194 L_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getL: The L factor "
195 "is null. Please call initialize() (and preferably also compute()) "
196 "before calling this method. If the input matrix has not yet been set, "
197 "you must first call setMatrix() with a nonnull input matrix before you "
198 "may call initialize() or compute().");
203 template<
class MatrixType>
204 const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
211 D_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getD: The D factor "
212 "(of diagonal entries) is null. Please call initialize() (and "
213 "preferably also compute()) before calling this method. If the input "
214 "matrix has not yet been set, you must first call setMatrix() with a "
215 "nonnull input matrix before you may call initialize() or compute().");
220 template<
class MatrixType>
225 U_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getU: The U factor "
226 "is null. Please call initialize() (and preferably also compute()) "
227 "before calling this method. If the input matrix has not yet been set, "
228 "you must first call setMatrix() with a nonnull input matrix before you "
229 "may call initialize() or compute().");
234 template<
class MatrixType>
237 A_.
is_null (), std::runtime_error,
"Ifpack2::RILUK::getNodeSmootherComplexity: "
238 "The input matrix A is null. Please call setMatrix() with a nonnull "
239 "input matrix, then call compute(), before calling this method.");
241 if(!L_.is_null() && !U_.is_null())
242 return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
248 template<
class MatrixType>
254 A_.
is_null (), std::runtime_error,
"Ifpack2::RILUK::getDomainMap: "
255 "The matrix is null. Please call setMatrix() with a nonnull input "
256 "before calling this method.");
260 Graph_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getDomainMap: "
261 "The computed graph is null. Please call initialize() before calling "
263 return Graph_->getL_Graph ()->getDomainMap ();
267 template<
class MatrixType>
273 A_.
is_null (), std::runtime_error,
"Ifpack2::RILUK::getRangeMap: "
274 "The matrix is null. Please call setMatrix() with a nonnull input "
275 "before calling this method.");
279 Graph_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getRangeMap: "
280 "The computed graph is null. Please call initialize() before calling "
282 return Graph_->getL_Graph ()->getRangeMap ();
286 template<
class MatrixType>
292 if (! isAllocated_) {
293 if (!isKokkosKernelsStream_) {
302 L_ =
rcp (
new crs_matrix_type (Graph_->getL_Graph ()));
303 U_ =
rcp (
new crs_matrix_type (Graph_->getU_Graph ()));
304 L_->setAllToScalar (STS::zero ());
305 U_->setAllToScalar (STS::zero ());
310 D_ =
rcp (
new vec_type (Graph_->getL_Graph ()->getRowMap ()));
313 L_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
314 U_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
315 for (
int i = 0; i < num_streams_; i++) {
319 L_v_[i] =
rcp (
new crs_matrix_type (Graph_v_[i]->getL_Graph ()));
320 U_v_[i] =
rcp (
new crs_matrix_type (Graph_v_[i]->getU_Graph ()));
321 L_v_[i]->setAllToScalar (STS::zero ());
322 U_v_[i]->setAllToScalar (STS::zero ());
324 L_v_[i]->fillComplete ();
325 U_v_[i]->fillComplete ();
333 template<
class MatrixType>
341 using Details::getParamTryingTypes;
342 const char prefix[] =
"Ifpack2::RILUK: ";
349 double overalloc = 2.;
361 const std::string paramName (
"fact: iluk level-of-fill");
362 getParamTryingTypes<int, int, global_ordinal_type, double, float>
363 (fillLevel, params, paramName, prefix);
368 const std::string paramName (
"fact: absolute threshold");
369 getParamTryingTypes<magnitude_type, magnitude_type, double>
370 (absThresh, params, paramName, prefix);
373 const std::string paramName (
"fact: relative threshold");
374 getParamTryingTypes<magnitude_type, magnitude_type, double>
375 (relThresh, params, paramName, prefix);
378 const std::string paramName (
"fact: relax value");
379 getParamTryingTypes<magnitude_type, magnitude_type, double>
380 (relaxValue, params, paramName, prefix);
383 const std::string paramName (
"fact: iluk overalloc");
384 getParamTryingTypes<double, double>
385 (overalloc, params, paramName, prefix);
389 Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
391 static const char typeName[] =
"fact: type";
393 if ( ! params.
isType<std::string>(typeName))
break;
396 Array<std::string> ilukimplTypeStrs;
397 Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
398 Details::IlukImplType::loadPLTypeOption (ilukimplTypeStrs, ilukimplTypeEnums);
400 s2i(ilukimplTypeStrs (), ilukimplTypeEnums (), typeName,
false);
405 if (ilukimplType == Details::IlukImplType::KSPILUK) {
406 this->isKokkosKernelsSpiluk_ =
true;
409 this->isKokkosKernelsSpiluk_ =
false;
413 const std::string paramName (
"fact: kspiluk number-of-streams");
414 getParamTryingTypes<int, int, global_ordinal_type>
415 (nstreams, params, paramName, prefix);
419 L_solver_->setParameters(params);
420 U_solver_->setParameters(params);
426 LevelOfFill_ = fillLevel;
427 Overalloc_ = overalloc;
428 num_streams_ = nstreams;
430 if (num_streams_ >= 1) {
431 this->isKokkosKernelsStream_ =
true;
433 if (params.
isParameter(
"fact: kspiluk reordering in streams"))
434 hasStreamReordered_ = params.
get<
bool> (
"fact: kspiluk reordering in streams");
437 this->isKokkosKernelsStream_ =
false;
445 if (absThresh != -STM::one ()) {
446 Athresh_ = absThresh;
448 if (relThresh != -STM::one ()) {
449 Rthresh_ = relThresh;
451 if (relaxValue != -STM::one ()) {
452 RelaxValue_ = relaxValue;
457 template<
class MatrixType>
464 template<
class MatrixType>
471 template<
class MatrixType>
477 using Teuchos::rcp_dynamic_cast;
478 using Teuchos::rcp_implicit_cast;
483 if (A->getRowMap ()->getComm ()->getSize () == 1 ||
484 A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
491 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
492 rcp_dynamic_cast<
const LocalFilter<row_matrix_type> > (A);
493 if (! A_lf_r.is_null ()) {
494 return rcp_implicit_cast<
const row_matrix_type> (A_lf_r);
500 return rcp (
new LocalFilter<row_matrix_type> (A));
505 template<
class MatrixType>
510 using Teuchos::rcp_const_cast;
511 using Teuchos::rcp_dynamic_cast;
512 using Teuchos::rcp_implicit_cast;
518 typedef Tpetra::Map<local_ordinal_type,
521 const char prefix[] =
"Ifpack2::RILUK::initialize: ";
524 (A_.
is_null (), std::runtime_error, prefix <<
"The matrix is null. Please "
525 "call setMatrix() with a nonnull input before calling this method.");
527 (! A_->isFillComplete (), std::runtime_error, prefix <<
"The matrix is not "
528 "fill complete. You may not invoke initialize() or compute() with this "
529 "matrix until the matrix is fill complete. If your matrix is a "
530 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
531 "range Maps, if appropriate) before calling this method.");
534 double startTime = timer.wallTime();
545 isInitialized_ =
false;
546 isAllocated_ =
false;
548 Graph_ = Teuchos::null;
550 if (isKokkosKernelsStream_) {
551 Graph_v_ = std::vector< Teuchos::RCP<iluk_graph_type> >(num_streams_);
552 A_local_diagblks = std::vector<local_matrix_device_type>(num_streams_);
553 for (
int i = 0; i < num_streams_; i++) {
554 Graph_v_[i] = Teuchos::null;
558 A_local_ = makeLocalFilter (A_);
561 A_local_.is_null (), std::logic_error,
"Ifpack2::RILUK::initialize: "
562 "makeLocalFilter returned null; it failed to compute A_local. "
563 "Please report this bug to the Ifpack2 developers.");
572 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(A_local_);
573 if(A_local_crs.is_null()) {
574 local_ordinal_type numRows = A_local_->getLocalNumRows();
575 Array<size_t> entriesPerRow(numRows);
576 for(local_ordinal_type i = 0; i < numRows; i++) {
577 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
579 RCP<crs_matrix_type> A_local_crs_nc =
581 A_local_->getColMap (),
584 nonconst_local_inds_host_view_type indices(
"indices",A_local_->getLocalMaxNumRowEntries());
585 nonconst_values_host_view_type values(
"values",A_local_->getLocalMaxNumRowEntries());
586 for(local_ordinal_type i = 0; i < numRows; i++) {
587 size_t numEntries = 0;
588 A_local_->getLocalRowCopy(i, indices, values, numEntries);
589 A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
591 A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
594 if (!isKokkosKernelsStream_) {
596 LevelOfFill_, 0, Overalloc_));
599 auto lclMtx = A_local_crs->getLocalMatrixDevice();
600 if (!hasStreamReordered_)
601 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks);
603 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks,
true);
604 for(
int i = 0; i < num_streams_; i++) {
606 A_local_diagblks[i].numRows(),
607 A_local_crs->getRowMap()->getComm()));
609 A_local_diagblks[i].numCols(),
610 A_local_crs->getColMap()->getComm()));
612 A_local_diagblks_ColMap,
613 A_local_diagblks[i]));
615 LevelOfFill_, 0, Overalloc_));
620 if (this->isKokkosKernelsSpiluk_) {
621 if (!isKokkosKernelsStream_) {
622 this->KernelHandle_ =
Teuchos::rcp (
new kk_handle_type ());
623 KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
624 A_local_->getLocalNumRows(),
625 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
626 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1) );
627 Graph_->initialize (KernelHandle_);
630 KernelHandle_v_ = std::vector< Teuchos::RCP<kk_handle_type> >(num_streams_);
631 for (
int i = 0; i < num_streams_; i++) {
632 KernelHandle_v_[i] =
Teuchos::rcp (
new kk_handle_type ());
633 KernelHandle_v_[i]->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
634 A_local_diagblks[i].numRows(),
635 2*A_local_diagblks[i].nnz()*(LevelOfFill_+1),
636 2*A_local_diagblks[i].nnz()*(LevelOfFill_+1) );
637 Graph_v_[i]->initialize (KernelHandle_v_[i]);
639 std::vector<int> weights(num_streams_);
640 std::fill(weights.begin(), weights.end(), 1);
641 exec_space_instances_ = Kokkos::Experimental::partition_space(
execution_space(), weights);
645 Graph_->initialize ();
649 checkOrderingConsistency (*A_local_);
650 if (!isKokkosKernelsStream_) {
651 L_solver_->setMatrix (L_);
654 L_solver_->setStreamInfo (isKokkosKernelsStream_, num_streams_, exec_space_instances_);
655 L_solver_->setMatrices (L_v_);
657 L_solver_->initialize ();
661 #if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030)
662 L_solver_->compute ();
665 if (!isKokkosKernelsStream_) {
666 U_solver_->setMatrix (U_);
669 U_solver_->setStreamInfo (isKokkosKernelsStream_, num_streams_, exec_space_instances_);
670 U_solver_->setMatrices (U_v_);
672 U_solver_->initialize ();
673 #if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030)
674 U_solver_->compute ();
682 isInitialized_ =
true;
684 initializeTime_ += (timer.wallTime() - startTime);
687 template<
class MatrixType>
697 bool gidsAreConsistentlyOrdered=
true;
698 global_ordinal_type indexOfInconsistentGID=0;
699 for (global_ordinal_type i=0; i<rowGIDs.
size(); ++i) {
700 if (rowGIDs[i] != colGIDs[i]) {
701 gidsAreConsistentlyOrdered=
false;
702 indexOfInconsistentGID=i;
707 "The ordering of the local GIDs in the row and column maps is not the same"
708 << std::endl <<
"at index " << indexOfInconsistentGID
709 <<
". Consistency is required, as all calculations are done with"
710 << std::endl <<
"local indexing.");
713 template<
class MatrixType>
716 initAllValues (
const row_matrix_type& A)
723 using Teuchos::reduceAll;
724 typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
726 size_t NumIn = 0, NumL = 0, NumU = 0;
727 bool DiagFound =
false;
728 size_t NumNonzeroDiags = 0;
729 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
733 nonconst_local_inds_host_view_type InI(
"InI",MaxNumEntries);
736 nonconst_values_host_view_type InV(
"InV",MaxNumEntries);
741 const bool ReplaceValues = L_->isStaticGraph () || L_->isLocallyIndexed ();
746 L_->setAllToScalar (STS::zero ());
747 U_->setAllToScalar (STS::zero ());
750 D_->putScalar (STS::zero ());
751 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
753 RCP<const map_type> rowMap = L_->getRowMap ();
764 for (
size_t myRow=0; myRow<A.getLocalNumRows(); ++myRow) {
765 local_ordinal_type local_row = myRow;
769 A.getLocalRowCopy (local_row, InI, InV, NumIn);
777 for (
size_t j = 0; j < NumIn; ++j) {
778 const local_ordinal_type k = InI[j];
780 if (k == local_row) {
783 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
787 true, std::runtime_error,
"Ifpack2::RILUK::initAllValues: current "
788 "GID k = " << k <<
" < 0. I'm not sure why this is an error; it is "
789 "probably an artifact of the undocumented assumptions of the "
790 "original implementation (likely copied and pasted from Ifpack). "
791 "Nevertheless, the code I found here insisted on this being an error "
792 "state, so I will throw an exception here.");
794 else if (k < local_row) {
799 else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
811 DV(local_row) = Athresh_;
816 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0,NumL));
820 L_->insertLocalValues(local_row, LI(0,NumL), LV(0,NumL));
826 U_->replaceLocalValues(local_row, UI(0,NumU), UV(0,NumU));
830 U_->insertLocalValues(local_row, UI(0,NumU), UV(0,NumU));
838 isInitialized_ =
true;
842 template<
class MatrixType>
847 using Teuchos::rcp_const_cast;
848 using Teuchos::rcp_dynamic_cast;
851 const char prefix[] =
"Ifpack2::RILUK::compute: ";
857 (A_.
is_null (), std::runtime_error, prefix <<
"The matrix is null. Please "
858 "call setMatrix() with a nonnull input before calling this method.");
860 (! A_->isFillComplete (), std::runtime_error, prefix <<
"The matrix is not "
861 "fill complete. You may not invoke initialize() or compute() with this "
862 "matrix until the matrix is fill complete. If your matrix is a "
863 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
864 "range Maps, if appropriate) before calling this method.");
866 if (! isInitialized ()) {
874 double startTime = timer.
wallTime();
878 if (!this->isKokkosKernelsSpiluk_) {
881 initAllValues (*A_local_);
887 const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
889 size_t NumIn, NumL, NumU;
892 const size_t MaxNumEntries =
893 L_->getLocalMaxNumRowEntries () + U_->getLocalMaxNumRowEntries () + 1;
897 size_t num_cols = U_->getColMap()->getLocalNumElements();
900 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
904 for (
size_t j = 0; j < num_cols; ++j) {
907 using IST =
typename row_matrix_type::impl_scalar_type;
908 for (
size_t i = 0; i < L_->getLocalNumRows (); ++i) {
912 local_inds_host_view_type UUI;
913 values_host_view_type UUV;
917 NumIn = MaxNumEntries;
918 nonconst_local_inds_host_view_type InI_v(InI.
data(),MaxNumEntries);
919 nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.
data()),MaxNumEntries);
921 L_->getLocalRowCopy (local_row, InI_v , InV_v, NumL);
924 InI[NumL] = local_row;
926 nonconst_local_inds_host_view_type InI_sub(InI.
data()+NumL+1,MaxNumEntries-NumL-1);
927 nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.
data())+NumL+1,MaxNumEntries-NumL-1);
929 U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU);
933 for (
size_t j = 0; j < NumIn; ++j) {
939 for (
size_t jj = 0; jj < NumL; ++jj) {
941 IST multiplier = InV[jj];
945 U_->getLocalRowView(j, UUI, UUV);
948 if (RelaxValue_ == STM::zero ()) {
949 for (
size_t k = 0; k < NumUU; ++k) {
950 const int kk = colflag[UUI[k]];
955 InV[kk] -=
static_cast<scalar_type>(multiplier * UUV[k]);
961 for (
size_t k = 0; k < NumUU; ++k) {
965 const int kk = colflag[UUI[k]];
967 InV[kk] -=
static_cast<scalar_type>(multiplier*UUV[k]);
970 diagmod -=
static_cast<scalar_type>(multiplier*UUV[k]);
978 L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL));
983 if (RelaxValue_ != STM::zero ()) {
984 DV(i) += RelaxValue_*diagmod;
987 if (STS::magnitude (DV(i)) > STS::magnitude (MaxDiagonalValue)) {
988 if (STS::real (DV(i)) < STM::zero ()) {
989 DV(i) = -MinDiagonalValue;
992 DV(i) = MinDiagonalValue;
999 for (
size_t j = 0; j < NumU; ++j) {
1005 U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
1009 for (
size_t j = 0; j < NumIn; ++j) {
1010 colflag[InI[j]] = -1;
1021 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
1022 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
1025 L_solver_->setMatrix (L_);
1026 L_solver_->compute ();
1027 U_solver_->setMatrix (U_);
1028 U_solver_->compute ();
1032 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(A_local_);
1033 if(A_local_crs.is_null()) {
1035 Array<size_t> entriesPerRow(numRows);
1037 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
1039 RCP<crs_matrix_type> A_local_crs_nc =
1041 A_local_->getColMap (),
1044 nonconst_local_inds_host_view_type indices(
"indices",A_local_->getLocalMaxNumRowEntries());
1045 nonconst_values_host_view_type values(
"values",A_local_->getLocalMaxNumRowEntries());
1047 size_t numEntries = 0;
1048 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1049 A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
1051 A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
1054 auto lclMtx = A_local_crs->getLocalMatrixDevice();
1055 if (!isKokkosKernelsStream_) {
1056 A_local_rowmap_ = lclMtx.graph.row_map;
1057 A_local_entries_ = lclMtx.graph.entries;
1058 A_local_values_ = lclMtx.values;
1061 if (!hasStreamReordered_)
1062 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks);
1064 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks,
true);
1066 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
1067 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
1068 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
1069 for(
int i = 0; i < num_streams_; i++) {
1070 A_local_diagblks_rowmap_v_[i] = A_local_diagblks[i].graph.row_map;
1071 A_local_diagblks_entries_v_[i] = A_local_diagblks[i].graph.entries;
1072 A_local_diagblks_values_v_[i] = A_local_diagblks[i].values;
1077 if (!isKokkosKernelsStream_) {
1081 if (L_->isStaticGraph () || L_->isLocallyIndexed ()) {
1082 L_->setAllToScalar (STS::zero ());
1083 U_->setAllToScalar (STS::zero ());
1087 for(
int i = 0; i < num_streams_; i++) {
1088 L_v_[i]->resumeFill ();
1089 U_v_[i]->resumeFill ();
1091 if (L_v_[i]->isStaticGraph () || L_v_[i]->isLocallyIndexed ()) {
1092 L_v_[i]->setAllToScalar (STS::zero ());
1093 U_v_[i]->setAllToScalar (STS::zero ());
1098 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
1100 if (!isKokkosKernelsStream_) {
1101 auto lclL = L_->getLocalMatrixDevice();
1102 row_map_type L_rowmap = lclL.graph.row_map;
1103 auto L_entries = lclL.graph.entries;
1104 auto L_values = lclL.values;
1106 auto lclU = U_->getLocalMatrixDevice();
1107 row_map_type U_rowmap = lclU.graph.row_map;
1108 auto U_entries = lclU.graph.entries;
1109 auto U_values = lclU.values;
1111 KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
1112 A_local_rowmap_, A_local_entries_, A_local_values_,
1113 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
1115 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
1116 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
1118 L_solver_->setMatrix (L_);
1119 U_solver_->setMatrix (U_);
1122 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1123 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1124 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1125 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1126 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1127 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1128 std::vector<kk_handle_type *> KernelHandle_rawptr_v_(num_streams_);
1129 for(
int i = 0; i < num_streams_; i++) {
1130 auto lclL = L_v_[i]->getLocalMatrixDevice();
1131 L_rowmap_v[i] = lclL.graph.row_map;
1132 L_entries_v[i] = lclL.graph.entries;
1133 L_values_v[i] = lclL.values;
1135 auto lclU = U_v_[i]->getLocalMatrixDevice();
1136 U_rowmap_v[i] = lclU.graph.row_map;
1137 U_entries_v[i] = lclU.graph.entries;
1138 U_values_v[i] = lclU.values;
1139 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1141 KokkosSparse::Experimental::spiluk_numeric_streams( exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1142 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1143 L_rowmap_v, L_entries_v, L_values_v,
1144 U_rowmap_v, U_entries_v, U_values_v );
1145 for(
int i = 0; i < num_streams_; i++) {
1146 L_v_[i]->fillComplete ();
1147 U_v_[i]->fillComplete ();
1150 L_solver_->setMatrices (L_v_);
1151 U_solver_->setMatrices (U_v_);
1154 L_solver_->compute ();
1155 U_solver_->compute ();
1160 computeTime_ += (timer.
wallTime() - startTime);
1164 template<
class MatrixType>
1167 apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1168 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1174 using Teuchos::rcpFromRef;
1177 A_.
is_null (), std::runtime_error,
"Ifpack2::RILUK::apply: The matrix is "
1178 "null. Please call setMatrix() with a nonnull input, then initialize() "
1179 "and compute(), before calling this method.");
1181 ! isComputed (), std::runtime_error,
1182 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1183 "you must call compute() before calling this method.");
1184 TEUCHOS_TEST_FOR_EXCEPTION(
1185 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1186 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1187 "X.getNumVectors() = " << X.getNumVectors ()
1188 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
1189 TEUCHOS_TEST_FOR_EXCEPTION(
1191 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1192 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1193 "fixed. There is a FIXME in this file about this very issue.");
1194 #ifdef HAVE_IFPACK2_DEBUG
1197 TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1201 for (
size_t j = 0; j < X.getNumVectors (); ++j) {
1202 if (STM::isnaninf (norms[j])) {
1207 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1209 #endif // HAVE_IFPACK2_DEBUG
1215 double startTime = timer.
wallTime();
1218 if (alpha == one && beta == zero) {
1219 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && hasStreamReordered_) {
1220 MV ReorderedX (X.getMap(), X.getNumVectors());
1221 MV ReorderedY (Y.getMap(), Y.getNumVectors());
1222 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1223 auto X_j = X.getVector(j);
1224 auto ReorderedX_j = ReorderedX.getVectorNonConst(j);
1225 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1226 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1229 for(
int i = 0; i < num_streams_; i++) {
1230 auto perm_i = perm_v_[i];
1231 stream_end = stream_begin + perm_i.extent(0);
1232 auto X_lcl_sub = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1233 auto ReorderedX_lcl_sub = Kokkos::subview (ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1234 Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA (
const int& ii ) {
1235 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1237 stream_begin = stream_end;
1243 L_solver_->apply (ReorderedX, Y, mode);
1245 U_solver_->apply (Y, ReorderedY, mode);
1249 U_solver_->apply (ReorderedX, Y, mode);
1251 L_solver_->apply (Y, ReorderedY, mode);
1254 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1255 auto Y_j = Y.getVectorNonConst(j);
1256 auto ReorderedY_j = ReorderedY.getVector(j);
1257 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1258 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1261 for(
int i = 0; i < num_streams_; i++) {
1262 auto perm_i = perm_v_[i];
1263 stream_end = stream_begin + perm_i.extent(0);
1264 auto Y_lcl_sub = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1265 auto ReorderedY_lcl_sub = Kokkos::subview (ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1266 Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA (
const int& ii ) {
1267 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1269 stream_begin = stream_end;
1275 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1279 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1282 L_solver_->apply (X, Y_tmp, mode);
1284 if (!this->isKokkosKernelsSpiluk_) {
1287 Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero);
1290 U_solver_->apply (Y_tmp, Y, mode);
1293 L_solver_->apply (X, Y, mode);
1295 if (!this->isKokkosKernelsSpiluk_) {
1298 Y.elementWiseMultiply (one, *D_, Y, zero);
1301 U_solver_->apply (Y, Y, mode);
1305 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1309 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1312 U_solver_->apply (X, Y_tmp, mode);
1314 if (!this->isKokkosKernelsSpiluk_) {
1320 Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero);
1323 L_solver_->apply (Y_tmp, Y, mode);
1326 U_solver_->apply (X, Y, mode);
1328 if (!this->isKokkosKernelsSpiluk_) {
1334 Y.elementWiseMultiply (one, *D_, Y, zero);
1337 L_solver_->apply (Y, Y, mode);
1343 if (alpha == zero) {
1353 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1354 apply (X, Y_tmp, mode);
1355 Y.update (alpha, Y_tmp, beta);
1360 #ifdef HAVE_IFPACK2_DEBUG
1365 for (
size_t j = 0; j < Y.getNumVectors (); ++j) {
1366 if (STM::isnaninf (norms[j])) {
1371 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1373 #endif // HAVE_IFPACK2_DEBUG
1376 applyTime_ += (timer.
wallTime() - startTime);
1413 template<
class MatrixType>
1416 std::ostringstream os;
1421 os <<
"\"Ifpack2::RILUK\": {";
1422 os <<
"Initialized: " << (isInitialized () ?
"true" :
"false") <<
", "
1423 <<
"Computed: " << (isComputed () ?
"true" :
"false") <<
", ";
1425 os <<
"Level-of-fill: " << getLevelOfFill() <<
", ";
1427 if(isKokkosKernelsSpiluk_) os<<
"KK-SPILUK, ";
1428 if(isKokkosKernelsStream_) os<<
"KK-Stream, ";
1431 os <<
"Matrix: null";
1434 os <<
"Global matrix dimensions: ["
1435 << A_->getGlobalNumRows () <<
", " << A_->getGlobalNumCols () <<
"]"
1436 <<
", Global nnz: " << A_->getGlobalNumEntries();
1439 if (! L_solver_.is_null ()) os <<
", " << L_solver_->description ();
1440 if (! U_solver_.is_null ()) os <<
", " << U_solver_->description ();
1448 #define IFPACK2_RILUK_INSTANT(S,LO,GO,N) \
1449 template class Ifpack2::RILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_RILUK_def.hpp:271
node_type::execution_space execution_space
The Kokkos execution space of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:275
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:263
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > & getD() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:208
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:506
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_RILUK_decl.hpp:281
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_RILUK_def.hpp:155
std::string description() const
A one-line description of this object.
Definition: Ifpack2_RILUK_def.hpp:1414
void setParameters(const Teuchos::ParameterList ¶ms)
Definition: Ifpack2_RILUK_def.hpp:336
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:245
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_RILUK_def.hpp:235
Teuchos::RCP< const crs_matrix_type > getCrsMatrix() const
Return the input matrix A as a Tpetra::CrsMatrix, if possible; else throws.
Definition: Ifpack2_RILUK_def.hpp:466
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_RILUK_decl.hpp:284
const crs_matrix_type & getU() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:222
bool isParameter(const std::string &name) const
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:266
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_RILUK_def.hpp:252
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:260
virtual ~RILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_RILUK_def.hpp:128
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:79
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:843
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_RILUK_def.hpp:1167
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_RILUK_decl.hpp:269
crs_matrix_type::impl_scalar_type impl_scalar_type
Scalar type stored in Kokkos::Views (CrsMatrix and MultiVector)
Definition: Ifpack2_RILUK_decl.hpp:293
void resize(size_type new_size, const value_type &x=value_type())
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:100
IntegralType getIntegralValue(const std::string &str, const std::string ¶mName="", const std::string &sublistName="") const
Teuchos::RCP< const row_matrix_type > getMatrix() const
Get the input matrix.
Definition: Ifpack2_RILUK_def.hpp:459
bool isType(const std::string &name) const
const crs_matrix_type & getL() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:191
std::string typeName(const T &t)
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:257