10 #ifndef IFPACK2_CRSRILUK_DEF_HPP
11 #define IFPACK2_CRSRILUK_DEF_HPP
14 #include "Ifpack2_LocalFilter.hpp"
15 #include "Tpetra_CrsMatrix.hpp"
16 #include "Teuchos_StandardParameterEntryValidators.hpp"
17 #include "Ifpack2_LocalSparseTriangularSolver.hpp"
18 #include "Ifpack2_Details_getParamTryingTypes.hpp"
19 #include "Ifpack2_Details_getCrsMatrix.hpp"
20 #include "Kokkos_Sort.hpp"
21 #include "KokkosSparse_Utils.hpp"
22 #include "KokkosKernels_Sorting.hpp"
23 #include "KokkosSparse_IOUtils.hpp"
36 type_strs[0] =
"Serial";
37 type_strs[1] =
"KSPILUK";
39 type_enums[0] = Serial;
40 type_enums[1] = KSPILUK;
45 template <
class MatrixType>
51 , isInitialized_(false)
56 , initializeTime_(0.0)
62 , isKokkosKernelsSpiluk_(false)
63 , isKokkosKernelsStream_(false)
65 , hasStreamReordered_(false) {
69 template <
class MatrixType>
75 , isInitialized_(false)
80 , initializeTime_(0.0)
86 , isKokkosKernelsSpiluk_(false)
87 , isKokkosKernelsStream_(false)
89 , hasStreamReordered_(false) {
93 template <
class MatrixType>
95 if (!isKokkosKernelsStream_) {
97 KernelHandle_->destroy_spiluk_handle();
100 for (
size_t i = 0; i < KernelHandle_v_.size(); i++) {
102 KernelHandle_v_[i]->destroy_spiluk_handle();
108 template <
class MatrixType>
111 L_solver_->setObjectLabel(
"lower");
113 U_solver_->setObjectLabel(
"upper");
116 template <
class MatrixType>
123 isAllocated_ =
false;
124 isInitialized_ =
false;
126 A_local_ = Teuchos::null;
127 Graph_ = Teuchos::null;
134 if (!L_solver_.is_null()) {
135 L_solver_->setMatrix(Teuchos::null);
137 if (!U_solver_.is_null()) {
138 U_solver_->setMatrix(Teuchos::null);
148 template <
class MatrixType>
152 L_.is_null(), std::runtime_error,
153 "Ifpack2::RILUK::getL: The L factor "
154 "is null. Please call initialize() (and preferably also compute()) "
155 "before calling this method. If the input matrix has not yet been set, "
156 "you must first call setMatrix() with a nonnull input matrix before you "
157 "may call initialize() or compute().");
161 template <
class MatrixType>
162 const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
168 D_.is_null(), std::runtime_error,
169 "Ifpack2::RILUK::getD: The D factor "
170 "(of diagonal entries) is null. Please call initialize() (and "
171 "preferably also compute()) before calling this method. If the input "
172 "matrix has not yet been set, you must first call setMatrix() with a "
173 "nonnull input matrix before you may call initialize() or compute().");
177 template <
class MatrixType>
181 U_.is_null(), std::runtime_error,
182 "Ifpack2::RILUK::getU: The U factor "
183 "is null. Please call initialize() (and preferably also compute()) "
184 "before calling this method. If the input matrix has not yet been set, "
185 "you must first call setMatrix() with a nonnull input matrix before you "
186 "may call initialize() or compute().");
190 template <
class MatrixType>
193 A_.
is_null(), std::runtime_error,
194 "Ifpack2::RILUK::getNodeSmootherComplexity: "
195 "The input matrix A is null. Please call setMatrix() with a nonnull "
196 "input matrix, then call compute(), before calling this method.");
198 if (!L_.is_null() && !U_.is_null())
199 return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
204 template <
class MatrixType>
210 A_.
is_null(), std::runtime_error,
211 "Ifpack2::RILUK::getDomainMap: "
212 "The matrix is null. Please call setMatrix() with a nonnull input "
213 "before calling this method.");
217 Graph_.is_null(), std::runtime_error,
218 "Ifpack2::RILUK::getDomainMap: "
219 "The computed graph is null. Please call initialize() before calling "
221 return Graph_->getL_Graph()->getDomainMap();
224 template <
class MatrixType>
230 A_.
is_null(), std::runtime_error,
231 "Ifpack2::RILUK::getRangeMap: "
232 "The matrix is null. Please call setMatrix() with a nonnull input "
233 "before calling this method.");
237 Graph_.is_null(), std::runtime_error,
238 "Ifpack2::RILUK::getRangeMap: "
239 "The computed graph is null. Please call initialize() before calling "
241 return Graph_->getL_Graph()->getRangeMap();
244 template <
class MatrixType>
250 if (!isKokkosKernelsStream_) {
259 L_ =
rcp(
new crs_matrix_type(Graph_->getL_Graph()));
260 U_ =
rcp(
new crs_matrix_type(Graph_->getU_Graph()));
261 L_->setAllToScalar(STS::zero());
262 U_->setAllToScalar(STS::zero());
267 D_ =
rcp(
new vec_type(Graph_->getL_Graph()->getRowMap()));
269 L_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_, null);
270 U_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_, null);
271 for (
int i = 0; i < num_streams_; i++) {
272 L_v_[i] =
rcp(
new crs_matrix_type(Graph_v_[i]->getL_Graph()));
273 U_v_[i] =
rcp(
new crs_matrix_type(Graph_v_[i]->getU_Graph()));
274 L_v_[i]->setAllToScalar(STS::zero());
275 U_v_[i]->setAllToScalar(STS::zero());
277 L_v_[i]->fillComplete();
278 U_v_[i]->fillComplete();
285 template <
class MatrixType>
288 using Details::getParamTryingTypes;
292 const char prefix[] =
"Ifpack2::RILUK: ";
299 double overalloc = 2.;
311 const std::string paramName(
"fact: iluk level-of-fill");
312 getParamTryingTypes<int, int, global_ordinal_type, double, float>(fillLevel, params, paramName, prefix);
317 const std::string paramName(
"fact: absolute threshold");
318 getParamTryingTypes<magnitude_type, magnitude_type, double>(absThresh, params, paramName, prefix);
321 const std::string paramName(
"fact: relative threshold");
322 getParamTryingTypes<magnitude_type, magnitude_type, double>(relThresh, params, paramName, prefix);
325 const std::string paramName(
"fact: relax value");
326 getParamTryingTypes<magnitude_type, magnitude_type, double>(relaxValue, params, paramName, prefix);
329 const std::string paramName(
"fact: iluk overalloc");
330 getParamTryingTypes<double, double>(overalloc, params, paramName, prefix);
334 Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
336 static const char typeName[] =
"fact: type";
338 if (!params.
isType<std::string>(typeName))
break;
341 Array<std::string> ilukimplTypeStrs;
342 Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
343 Details::IlukImplType::loadPLTypeOption(ilukimplTypeStrs, ilukimplTypeEnums);
345 s2i(ilukimplTypeStrs(), ilukimplTypeEnums(), typeName,
false);
350 if (ilukimplType == Details::IlukImplType::KSPILUK) {
351 this->isKokkosKernelsSpiluk_ =
true;
353 this->isKokkosKernelsSpiluk_ =
false;
357 const std::string paramName(
"fact: kspiluk number-of-streams");
358 getParamTryingTypes<int, int, global_ordinal_type>(nstreams, params, paramName, prefix);
362 L_solver_->setParameters(params);
363 U_solver_->setParameters(params);
369 LevelOfFill_ = fillLevel;
370 Overalloc_ = overalloc;
371 #ifdef KOKKOS_ENABLE_OPENMP
372 if constexpr (std::is_same_v<execution_space, Kokkos::OpenMP>) {
376 num_streams_ = nstreams;
378 if (num_streams_ >= 1) {
379 this->isKokkosKernelsStream_ =
true;
381 if (params.
isParameter(
"fact: kspiluk reordering in streams"))
382 hasStreamReordered_ = params.
get<
bool>(
"fact: kspiluk reordering in streams");
384 this->isKokkosKernelsStream_ =
false;
392 if (absThresh != -STM::one()) {
393 Athresh_ = absThresh;
395 if (relThresh != -STM::one()) {
396 Rthresh_ = relThresh;
398 if (relaxValue != -STM::one()) {
399 RelaxValue_ = relaxValue;
403 template <
class MatrixType>
409 template <
class MatrixType>
415 template <
class MatrixType>
420 using Teuchos::rcp_dynamic_cast;
421 using Teuchos::rcp_implicit_cast;
426 if (A->getRowMap()->getComm()->getSize() == 1 ||
427 A->getRowMap()->isSameAs(*(A->getColMap()))) {
434 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
436 if (!A_lf_r.is_null()) {
446 template <
class MatrixType>
452 using Teuchos::rcp_const_cast;
453 using Teuchos::rcp_dynamic_cast;
454 using Teuchos::rcp_implicit_cast;
459 typedef Tpetra::Map<local_ordinal_type,
463 const char prefix[] =
"Ifpack2::RILUK::initialize: ";
466 "call setMatrix() with a nonnull input before calling this method.");
468 "fill complete. You may not invoke initialize() or compute() with this "
469 "matrix until the matrix is fill complete. If your matrix is a "
470 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
471 "range Maps, if appropriate) before calling this method.");
474 double startTime = timer.wallTime();
485 isInitialized_ =
false;
486 isAllocated_ =
false;
488 Graph_ = Teuchos::null;
490 reordered_x_ =
nullptr;
491 reordered_y_ =
nullptr;
493 if (isKokkosKernelsStream_) {
494 Graph_v_ = std::vector<Teuchos::RCP<iluk_graph_type> >(num_streams_);
495 A_local_diagblks_v_ = std::vector<local_matrix_device_type>(num_streams_);
496 for (
int i = 0; i < num_streams_; i++) {
497 Graph_v_[i] = Teuchos::null;
501 A_local_ = makeLocalFilter(A_);
504 A_local_.is_null(), std::logic_error,
505 "Ifpack2::RILUK::initialize: "
506 "makeLocalFilter returned null; it failed to compute A_local. "
507 "Please report this bug to the Ifpack2 developers.");
516 A_local_crs_ = Details::getCrsMatrix(A_local_);
517 if (A_local_crs_.is_null()) {
518 local_ordinal_type numRows = A_local_->getLocalNumRows();
519 Array<size_t> entriesPerRow(numRows);
520 for (local_ordinal_type i = 0; i < numRows; i++) {
521 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
525 A_local_->getColMap(),
528 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
529 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
530 for (local_ordinal_type i = 0; i < numRows; i++) {
531 size_t numEntries = 0;
532 A_local_->getLocalRowCopy(i, indices, values, numEntries);
533 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
535 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
538 if (!isKokkosKernelsStream_) {
540 LevelOfFill_, 0, Overalloc_));
542 std::vector<int> weights(num_streams_);
543 std::fill(weights.begin(), weights.end(), 1);
544 exec_space_instances_ = Kokkos::Experimental::partition_space(
execution_space(), weights);
546 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
547 if (!hasStreamReordered_) {
548 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
550 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_,
true);
551 reverse_perm_v_.resize(perm_v_.size());
552 for (
size_t istream = 0; istream < perm_v_.size(); ++istream) {
553 using perm_type =
typename lno_nonzero_view_t::non_const_type;
554 const auto perm = perm_v_[istream];
555 const auto perm_length = perm.extent(0);
556 perm_type reverse_perm(
557 Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm"),
559 Kokkos::parallel_for(
560 Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
561 KOKKOS_LAMBDA(
const local_ordinal_type ii) {
562 reverse_perm(perm(ii)) = ii;
564 reverse_perm_v_[istream] = reverse_perm;
568 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
569 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
570 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
572 for (
int i = 0; i < num_streams_; i++) {
573 A_local_diagblks_rowmap_v_[i] = A_local_diagblks_v_[i].graph.row_map;
574 A_local_diagblks_entries_v_[i] = A_local_diagblks_v_[i].graph.entries;
575 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
578 A_local_diagblks_v_[i].numRows(),
579 A_local_crs_->getRowMap()->getComm()));
581 A_local_diagblks_v_[i].numCols(),
582 A_local_crs_->getColMap()->getComm()));
584 A_local_diagblks_ColMap,
585 A_local_diagblks_v_[i]));
587 LevelOfFill_, 0, Overalloc_));
592 if (this->isKokkosKernelsSpiluk_) {
593 if (!isKokkosKernelsStream_) {
594 this->KernelHandle_ =
Teuchos::rcp(
new kk_handle_type());
595 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
596 A_local_->getLocalNumRows(),
597 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1),
598 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1));
599 Graph_->initialize(KernelHandle_);
601 KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(num_streams_);
602 for (
int i = 0; i < num_streams_; i++) {
603 KernelHandle_v_[i] =
Teuchos::rcp(
new kk_handle_type());
604 KernelHandle_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
605 A_local_diagblks_v_[i].numRows(),
606 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1),
607 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1));
608 Graph_v_[i]->initialize(KernelHandle_v_[i]);
612 Graph_->initialize();
616 checkOrderingConsistency(*A_local_);
617 if (!isKokkosKernelsStream_) {
618 L_solver_->setMatrix(L_);
620 L_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
621 L_solver_->setMatrices(L_v_);
623 L_solver_->initialize();
625 if (!isKokkosKernelsStream_) {
626 U_solver_->setMatrix(U_);
628 U_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
629 U_solver_->setMatrices(U_v_);
631 U_solver_->initialize();
638 isInitialized_ =
true;
640 initializeTime_ += (timer.wallTime() - startTime);
643 template <
class MatrixType>
651 bool gidsAreConsistentlyOrdered =
true;
652 global_ordinal_type indexOfInconsistentGID = 0;
653 for (global_ordinal_type i = 0; i < rowGIDs.
size(); ++i) {
654 if (rowGIDs[i] != colGIDs[i]) {
655 gidsAreConsistentlyOrdered =
false;
656 indexOfInconsistentGID = i;
661 "The ordering of the local GIDs in the row and column maps is not the same"
663 <<
"at index " << indexOfInconsistentGID
664 <<
". Consistency is required, as all calculations are done with"
666 <<
"local indexing.");
669 template <
class MatrixType>
670 void RILUK<MatrixType>::
671 initAllValues(
const row_matrix_type& A) {
677 using Teuchos::reduceAll;
678 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
680 size_t NumIn = 0, NumL = 0, NumU = 0;
681 bool DiagFound =
false;
682 size_t NumNonzeroDiags = 0;
683 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
687 nonconst_local_inds_host_view_type InI(
"InI", MaxNumEntries);
690 nonconst_values_host_view_type InV(
"InV", MaxNumEntries);
695 const bool ReplaceValues = L_->isStaticGraph() || L_->isLocallyIndexed();
700 L_->setAllToScalar(STS::zero());
701 U_->setAllToScalar(STS::zero());
704 D_->putScalar(STS::zero());
705 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
707 RCP<const map_type> rowMap = L_->getRowMap();
718 for (
size_t myRow = 0; myRow < A.getLocalNumRows(); ++myRow) {
719 local_ordinal_type local_row = myRow;
723 A.getLocalRowCopy(local_row, InI, InV, NumIn);
731 for (
size_t j = 0; j < NumIn; ++j) {
732 const local_ordinal_type k = InI[j];
734 if (k == local_row) {
737 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
740 true, std::runtime_error,
741 "Ifpack2::RILUK::initAllValues: current "
743 << k <<
" < 0. I'm not sure why this is an error; it is "
744 "probably an artifact of the undocumented assumptions of the "
745 "original implementation (likely copied and pasted from Ifpack). "
746 "Nevertheless, the code I found here insisted on this being an error "
747 "state, so I will throw an exception here.");
748 }
else if (k < local_row) {
752 }
else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
764 DV(local_row) = Athresh_;
769 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0, NumL));
773 L_->insertLocalValues(local_row, LI(0, NumL), LV(0, NumL));
779 U_->replaceLocalValues(local_row, UI(0, NumU), UV(0, NumU));
783 U_->insertLocalValues(local_row, UI(0, NumU), UV(0, NumU));
791 isInitialized_ =
true;
794 template <
class MatrixType>
795 void RILUK<MatrixType>::compute_serial() {
798 initAllValues(*A_local_);
803 const scalar_type MinDiagonalValue = STS::rmin();
804 const scalar_type MaxDiagonalValue = STS::one() / MinDiagonalValue;
806 size_t NumIn, NumL, NumU;
809 const size_t MaxNumEntries =
810 L_->getLocalMaxNumRowEntries() + U_->getLocalMaxNumRowEntries() + 1;
814 size_t num_cols = U_->getColMap()->getLocalNumElements();
817 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
821 using IST =
typename row_matrix_type::impl_scalar_type;
822 for (
size_t i = 0; i < L_->getLocalNumRows(); ++i) {
823 local_ordinal_type local_row = i;
826 local_inds_host_view_type UUI;
827 values_host_view_type UUV;
831 NumIn = MaxNumEntries;
832 nonconst_local_inds_host_view_type InI_v(InI.data(), MaxNumEntries);
833 nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.data()), MaxNumEntries);
835 L_->getLocalRowCopy(local_row, InI_v, InV_v, NumL);
838 InI[NumL] = local_row;
840 nonconst_local_inds_host_view_type InI_sub(InI.data() + NumL + 1, MaxNumEntries - NumL - 1);
841 nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data()) + NumL + 1, MaxNumEntries - NumL - 1);
843 U_->getLocalRowCopy(local_row, InI_sub, InV_sub, NumU);
844 NumIn = NumL + NumU + 1;
847 for (
size_t j = 0; j < NumIn; ++j) {
851 scalar_type diagmod = STS::zero();
853 for (
size_t jj = 0; jj < NumL; ++jj) {
854 local_ordinal_type j = InI[jj];
855 IST multiplier = InV[jj];
857 InV[jj] *=
static_cast<scalar_type
>(DV(j));
859 U_->getLocalRowView(j, UUI, UUV);
862 if (RelaxValue_ == STM::zero()) {
863 for (
size_t k = 0; k < NumUU; ++k) {
864 const int kk = colflag[UUI[k]];
869 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
874 for (
size_t k = 0; k < NumUU; ++k) {
878 const int kk = colflag[UUI[k]];
880 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
882 diagmod -=
static_cast<scalar_type
>(multiplier * UUV[k]);
890 L_->replaceLocalValues(local_row, InI(0, NumL), InV(0, NumL));
895 if (RelaxValue_ != STM::zero()) {
896 DV(i) += RelaxValue_ * diagmod;
899 if (STS::magnitude(DV(i)) > STS::magnitude(MaxDiagonalValue)) {
900 if (STS::real(DV(i)) < STM::zero()) {
901 DV(i) = -MinDiagonalValue;
903 DV(i) = MinDiagonalValue;
906 DV(i) =
static_cast<impl_scalar_type
>(STS::one()) / DV(i);
909 for (
size_t j = 0; j < NumU; ++j) {
910 InV[NumL + 1 + j] *=
static_cast<scalar_type
>(DV(i));
915 U_->replaceLocalValues(local_row, InI(NumL + 1, NumU), InV(NumL + 1, NumU));
919 for (
size_t j = 0; j < NumIn; ++j) {
920 colflag[InI[j]] = -1;
931 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
932 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
935 L_solver_->setMatrix(L_);
936 L_solver_->compute();
937 U_solver_->setMatrix(U_);
938 U_solver_->compute();
941 template <
class MatrixType>
942 void RILUK<MatrixType>::compute_kkspiluk() {
946 L_->setAllToScalar(STS::zero());
947 U_->setAllToScalar(STS::zero());
949 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
950 auto lclL = L_->getLocalMatrixDevice();
951 row_map_type L_rowmap = lclL.graph.row_map;
952 auto L_entries = lclL.graph.entries;
953 auto L_values = lclL.values;
955 auto lclU = U_->getLocalMatrixDevice();
956 row_map_type U_rowmap = lclU.graph.row_map;
957 auto U_entries = lclU.graph.entries;
958 auto U_values = lclU.values;
960 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
961 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), LevelOfFill_,
962 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
963 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
965 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
966 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
968 L_solver_->compute();
969 U_solver_->compute();
972 template <
class MatrixType>
973 void RILUK<MatrixType>::compute_kkspiluk_stream() {
974 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
975 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
976 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
977 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
978 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
979 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
980 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(num_streams_);
981 for (
int i = 0; i < num_streams_; i++) {
982 L_v_[i]->resumeFill();
983 U_v_[i]->resumeFill();
985 L_v_[i]->setAllToScalar(STS::zero());
986 U_v_[i]->setAllToScalar(STS::zero());
988 auto lclL = L_v_[i]->getLocalMatrixDevice();
989 L_rowmap_v[i] = lclL.graph.row_map;
990 L_entries_v[i] = lclL.graph.entries;
991 L_values_v[i] = lclL.values;
993 auto lclU = U_v_[i]->getLocalMatrixDevice();
994 U_rowmap_v[i] = lclU.graph.row_map;
995 U_entries_v[i] = lclU.graph.entries;
996 U_values_v[i] = lclU.values;
997 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1001 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1004 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1005 const auto A_nrows = lclMtx.numRows();
1006 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1007 ? (A_nrows / num_streams_)
1008 : (A_nrows / num_streams_ + 1);
1009 for (
int i = 0; i < num_streams_; i++) {
1010 const auto start_row_offset = i * rows_per_block;
1011 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1012 auto colindices = A_local_diagblks_entries_v_[i];
1013 auto values = A_local_diagblks_values_v_[i];
1014 const bool reordered = hasStreamReordered_;
1015 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] :
typename lno_nonzero_view_t::non_const_type{};
1016 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1017 Kokkos::parallel_for(
1018 pol, KOKKOS_LAMBDA(
const typename TeamPolicy::member_type& team) {
1019 const auto irow = team.league_rank();
1020 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1021 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1022 const auto begin_row = rowptrs(irow);
1023 const auto num_entries = rowptrs(irow + 1) - begin_row;
1024 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](
const int j) {
1025 const auto colidx = colindices(begin_row + j);
1026 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1028 const int offset = KokkosSparse::findRelOffset(
1029 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0,
false);
1030 values(begin_row + j) = A_local_crs_row.value(offset);
1036 KokkosSparse::Experimental::spiluk_numeric_streams(exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1037 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1038 L_rowmap_v, L_entries_v, L_values_v,
1039 U_rowmap_v, U_entries_v, U_values_v);
1041 for (
int i = 0; i < num_streams_; i++) {
1042 L_v_[i]->fillComplete();
1043 U_v_[i]->fillComplete();
1046 L_solver_->compute();
1047 U_solver_->compute();
1050 template <
class MatrixType>
1056 using Teuchos::rcp_const_cast;
1057 using Teuchos::rcp_dynamic_cast;
1058 const char prefix[] =
"Ifpack2::RILUK::compute: ";
1064 "call setMatrix() with a nonnull input before calling this method.");
1066 "fill complete. You may not invoke initialize() or compute() with this "
1067 "matrix until the matrix is fill complete. If your matrix is a "
1068 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1069 "range Maps, if appropriate) before calling this method.");
1071 if (!isInitialized()) {
1079 double startTime = timer.
wallTime();
1081 isComputed_ =
false;
1083 if (!this->isKokkosKernelsSpiluk_) {
1087 if (!A_local_crs_nc_.is_null()) {
1089 A_local_crs_nc_->resumeFill();
1091 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
1092 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
1094 size_t numEntries = 0;
1095 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1096 A_local_crs_nc_->replaceLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
1098 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
1101 if (!isKokkosKernelsStream_) {
1105 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1106 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1107 for (
int i = 0; i < num_streams_; i++) {
1108 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
1111 compute_kkspiluk_stream();
1117 computeTime_ += (timer.
wallTime() - startTime);
1121 template <
typename MV,
typename Map>
1122 void resetMultiVecIfNeeded(std::unique_ptr<MV>& mv_ptr,
const Map& map,
const size_t numVectors,
bool initialize) {
1123 if (!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1124 mv_ptr.reset(
new MV(map, numVectors, initialize));
1129 template <
class MatrixType>
1131 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1132 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1137 using Teuchos::rcpFromRef;
1140 A_.
is_null(), std::runtime_error,
1141 "Ifpack2::RILUK::apply: The matrix is "
1142 "null. Please call setMatrix() with a nonnull input, then initialize() "
1143 "and compute(), before calling this method.");
1145 !isComputed(), std::runtime_error,
1146 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1147 "you must call compute() before calling this method.");
1148 TEUCHOS_TEST_FOR_EXCEPTION(
1149 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1150 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1151 "X.getNumVectors() = "
1152 << X.getNumVectors()
1153 <<
" != Y.getNumVectors() = " << Y.getNumVectors() <<
".");
1154 TEUCHOS_TEST_FOR_EXCEPTION(
1156 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1157 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1158 "fixed. There is a FIXME in this file about this very issue.");
1159 #ifdef HAVE_IFPACK2_DEBUG
1161 if (!isKokkosKernelsStream_) {
1163 TEUCHOS_TEST_FOR_EXCEPTION(STM::isnaninf(D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1168 for (
size_t j = 0; j < X.getNumVectors(); ++j) {
1169 if (STM::isnaninf(norms[j])) {
1174 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1176 #endif // HAVE_IFPACK2_DEBUG
1182 double startTime = timer.
wallTime();
1185 if (alpha == one && beta == zero) {
1186 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && hasStreamReordered_) {
1187 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(),
false);
1188 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(),
false);
1190 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1191 auto X_j = X.getVector(j);
1192 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1193 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1194 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1197 for (
int i = 0; i < num_streams_; i++) {
1198 auto perm_i = perm_v_[i];
1199 stream_end = stream_begin + perm_i.extent(0);
1200 auto X_lcl_sub = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1201 auto ReorderedX_lcl_sub = Kokkos::subview(ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1202 Kokkos::parallel_for(
1203 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1204 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1206 stream_begin = stream_end;
1212 L_solver_->apply(*reordered_x_, Y, mode);
1214 U_solver_->apply(Y, *reordered_y_, mode);
1217 U_solver_->apply(*reordered_x_, Y, mode);
1219 L_solver_->apply(Y, *reordered_y_, mode);
1222 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1223 auto Y_j = Y.getVectorNonConst(j);
1224 auto ReorderedY_j = reordered_y_->getVector(j);
1225 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1226 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1229 for (
int i = 0; i < num_streams_; i++) {
1230 auto perm_i = perm_v_[i];
1231 stream_end = stream_begin + perm_i.extent(0);
1232 auto Y_lcl_sub = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1233 auto ReorderedY_lcl_sub = Kokkos::subview(ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1234 Kokkos::parallel_for(
1235 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1236 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1238 stream_begin = stream_end;
1244 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1248 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1251 L_solver_->apply(X, *Y_tmp_, mode);
1253 if (!this->isKokkosKernelsSpiluk_) {
1256 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1259 U_solver_->apply(*Y_tmp_, Y, mode);
1262 L_solver_->apply(X, Y, mode);
1264 if (!this->isKokkosKernelsSpiluk_) {
1267 Y.elementWiseMultiply(one, *D_, Y, zero);
1270 U_solver_->apply(Y, Y, mode);
1273 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1277 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1280 U_solver_->apply(X, *Y_tmp_, mode);
1282 if (!this->isKokkosKernelsSpiluk_) {
1288 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1291 L_solver_->apply(*Y_tmp_, Y, mode);
1294 U_solver_->apply(X, Y, mode);
1296 if (!this->isKokkosKernelsSpiluk_) {
1302 Y.elementWiseMultiply(one, *D_, Y, zero);
1305 L_solver_->apply(Y, Y, mode);
1310 if (alpha == zero) {
1320 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1321 apply(X, *Y_tmp_, mode);
1322 Y.update(alpha, *Y_tmp_, beta);
1327 #ifdef HAVE_IFPACK2_DEBUG
1332 for (
size_t j = 0; j < Y.getNumVectors(); ++j) {
1333 if (STM::isnaninf(norms[j])) {
1338 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1340 #endif // HAVE_IFPACK2_DEBUG
1343 applyTime_ += (timer.
wallTime() - startTime);
1379 template <
class MatrixType>
1381 std::ostringstream os;
1386 os <<
"\"Ifpack2::RILUK\": {";
1387 os <<
"Initialized: " << (isInitialized() ?
"true" :
"false") <<
", "
1388 <<
"Computed: " << (isComputed() ?
"true" :
"false") <<
", ";
1390 os <<
"Level-of-fill: " << getLevelOfFill() <<
", ";
1392 if (isKokkosKernelsSpiluk_) os <<
"KK-SPILUK, ";
1393 if (isKokkosKernelsStream_) os <<
"KK-Stream, ";
1396 os <<
"Matrix: null";
1398 os <<
"Global matrix dimensions: ["
1399 << A_->getGlobalNumRows() <<
", " << A_->getGlobalNumCols() <<
"]"
1400 <<
", Global nnz: " << A_->getGlobalNumEntries();
1403 if (!L_solver_.is_null()) os <<
", " << L_solver_->description();
1404 if (!U_solver_.is_null()) os <<
", " << U_solver_->description();
1412 #define IFPACK2_RILUK_INSTANT(S, LO, GO, N) \
1413 template class Ifpack2::RILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_RILUK_def.hpp:228
node_type::execution_space execution_space
The Kokkos execution space of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:241
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
Definition: Ifpack2_RILUK_def.hpp:417
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:229
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > & getD() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:166
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:447
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_RILUK_decl.hpp:248
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_RILUK_def.hpp:117
std::string description() const
A one-line description of this object.
Definition: Ifpack2_RILUK_def.hpp:1380
void setParameters(const Teuchos::ParameterList ¶ms)
Definition: Ifpack2_RILUK_def.hpp:287
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:213
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_RILUK_def.hpp:191
Teuchos::RCP< const crs_matrix_type > getCrsMatrix() const
Return the input matrix A as a Tpetra::CrsMatrix, if possible; else throws.
Definition: Ifpack2_RILUK_def.hpp:411
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_RILUK_decl.hpp:250
const crs_matrix_type & getU() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:179
bool isParameter(const std::string &name) const
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:232
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_RILUK_def.hpp:208
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:226
virtual ~RILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_RILUK_def.hpp:94
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:1051
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_RILUK_def.hpp:1131
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_RILUK_decl.hpp:235
void resize(size_type new_size, const value_type &x=value_type())
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:67
IntegralType getIntegralValue(const std::string &str, const std::string ¶mName="", const std::string &sublistName="") const
Teuchos::RCP< const row_matrix_type > getMatrix() const
Get the input matrix.
Definition: Ifpack2_RILUK_def.hpp:405
bool isType(const std::string &name) const
Declaration of MDF interface.
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:127
const crs_matrix_type & getL() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:150
std::string typeName(const T &t)
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:223