10 #ifndef IFPACK2_CRSRILUK_DEF_HPP
11 #define IFPACK2_CRSRILUK_DEF_HPP
14 #include "Ifpack2_LocalFilter.hpp"
15 #include "Tpetra_CrsMatrix.hpp"
16 #include "Teuchos_StandardParameterEntryValidators.hpp"
17 #include "Ifpack2_LocalSparseTriangularSolver.hpp"
18 #include "Ifpack2_Details_getParamTryingTypes.hpp"
19 #include "Ifpack2_Details_getCrsMatrix.hpp"
20 #include "Kokkos_Sort.hpp"
21 #include "KokkosSparse_Utils.hpp"
22 #include "KokkosKernels_Sorting.hpp"
23 #include "KokkosSparse_IOUtils.hpp"
36 type_strs[0] =
"Serial";
37 type_strs[1] =
"KSPILUK";
39 type_enums[0] = Serial;
40 type_enums[1] = KSPILUK;
45 template <
class MatrixType>
48 , A_coordinates_(Matrix_in_coordinates)
52 , isInitialized_(false)
57 , initializeTime_(0.0)
63 , isKokkosKernelsSpiluk_(false)
64 , isKokkosKernelsStream_(false)
66 , hasStreamReordered_(false)
67 , hasStreamsWithRCB_(false) {
71 template <
class MatrixType>
74 , A_coordinates_(Matrix_in_coordinates)
78 , isInitialized_(false)
83 , initializeTime_(0.0)
89 , isKokkosKernelsSpiluk_(false)
90 , isKokkosKernelsStream_(false)
92 , hasStreamReordered_(false)
93 , hasStreamsWithRCB_(false) {
97 template <
class MatrixType>
99 if (!isKokkosKernelsStream_) {
101 KernelHandle_->destroy_spiluk_handle();
104 for (
size_t i = 0; i < KernelHandle_v_.size(); i++) {
106 KernelHandle_v_[i]->destroy_spiluk_handle();
112 template <
class MatrixType>
115 L_solver_->setObjectLabel(
"lower");
117 U_solver_->setObjectLabel(
"upper");
120 template <
class MatrixType>
127 isAllocated_ =
false;
128 isInitialized_ =
false;
130 A_local_ = Teuchos::null;
131 Graph_ = Teuchos::null;
138 if (!L_solver_.is_null()) {
139 L_solver_->setMatrix(Teuchos::null);
141 if (!U_solver_.is_null()) {
142 U_solver_->setMatrix(Teuchos::null);
152 template <
class MatrixType>
154 if (A_coordinates.
getRawPtr() != A_coordinates_.getRawPtr()) {
155 A_coordinates_ = A_coordinates;
159 template <
class MatrixType>
163 L_.is_null(), std::runtime_error,
164 "Ifpack2::RILUK::getL: The L factor "
165 "is null. Please call initialize() (and preferably also compute()) "
166 "before calling this method. If the input matrix has not yet been set, "
167 "you must first call setMatrix() with a nonnull input matrix before you "
168 "may call initialize() or compute().");
172 template <
class MatrixType>
173 const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
179 D_.is_null(), std::runtime_error,
180 "Ifpack2::RILUK::getD: The D factor "
181 "(of diagonal entries) is null. Please call initialize() (and "
182 "preferably also compute()) before calling this method. If the input "
183 "matrix has not yet been set, you must first call setMatrix() with a "
184 "nonnull input matrix before you may call initialize() or compute().");
188 template <
class MatrixType>
192 U_.is_null(), std::runtime_error,
193 "Ifpack2::RILUK::getU: The U factor "
194 "is null. Please call initialize() (and preferably also compute()) "
195 "before calling this method. If the input matrix has not yet been set, "
196 "you must first call setMatrix() with a nonnull input matrix before you "
197 "may call initialize() or compute().");
201 template <
class MatrixType>
204 A_.
is_null(), std::runtime_error,
205 "Ifpack2::RILUK::getNodeSmootherComplexity: "
206 "The input matrix A is null. Please call setMatrix() with a nonnull "
207 "input matrix, then call compute(), before calling this method.");
209 if (!L_.is_null() && !U_.is_null())
210 return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
215 template <
class MatrixType>
221 A_.
is_null(), std::runtime_error,
222 "Ifpack2::RILUK::getDomainMap: "
223 "The matrix is null. Please call setMatrix() with a nonnull input "
224 "before calling this method.");
228 Graph_.is_null(), std::runtime_error,
229 "Ifpack2::RILUK::getDomainMap: "
230 "The computed graph is null. Please call initialize() before calling "
232 return Graph_->getL_Graph()->getDomainMap();
235 template <
class MatrixType>
241 A_.
is_null(), std::runtime_error,
242 "Ifpack2::RILUK::getRangeMap: "
243 "The matrix is null. Please call setMatrix() with a nonnull input "
244 "before calling this method.");
248 Graph_.is_null(), std::runtime_error,
249 "Ifpack2::RILUK::getRangeMap: "
250 "The computed graph is null. Please call initialize() before calling "
252 return Graph_->getL_Graph()->getRangeMap();
255 template <
class MatrixType>
261 if (!isKokkosKernelsStream_) {
270 L_ =
rcp(
new crs_matrix_type(Graph_->getL_Graph()));
271 U_ =
rcp(
new crs_matrix_type(Graph_->getU_Graph()));
272 L_->setAllToScalar(STS::zero());
273 U_->setAllToScalar(STS::zero());
278 D_ =
rcp(
new vec_type(Graph_->getL_Graph()->getRowMap()));
280 L_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_, null);
281 U_v_ = std::vector<Teuchos::RCP<crs_matrix_type> >(num_streams_, null);
282 for (
int i = 0; i < num_streams_; i++) {
283 L_v_[i] =
rcp(
new crs_matrix_type(Graph_v_[i]->getL_Graph()));
284 U_v_[i] =
rcp(
new crs_matrix_type(Graph_v_[i]->getU_Graph()));
285 L_v_[i]->setAllToScalar(STS::zero());
286 U_v_[i]->setAllToScalar(STS::zero());
288 L_v_[i]->fillComplete();
289 U_v_[i]->fillComplete();
296 template <
class MatrixType>
299 using Details::getParamTryingTypes;
303 const char prefix[] =
"Ifpack2::RILUK: ";
310 double overalloc = 2.;
322 const std::string paramName(
"fact: iluk level-of-fill");
323 getParamTryingTypes<int, int, global_ordinal_type, double, float>(fillLevel, params, paramName, prefix);
328 const std::string paramName(
"fact: absolute threshold");
329 getParamTryingTypes<magnitude_type, magnitude_type, double>(absThresh, params, paramName, prefix);
332 const std::string paramName(
"fact: relative threshold");
333 getParamTryingTypes<magnitude_type, magnitude_type, double>(relThresh, params, paramName, prefix);
336 const std::string paramName(
"fact: relax value");
337 getParamTryingTypes<magnitude_type, magnitude_type, double>(relaxValue, params, paramName, prefix);
340 const std::string paramName(
"fact: iluk overalloc");
341 getParamTryingTypes<double, double>(overalloc, params, paramName, prefix);
345 Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
347 static const char typeName[] =
"fact: type";
349 if (!params.
isType<std::string>(typeName))
break;
352 Array<std::string> ilukimplTypeStrs;
353 Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
354 Details::IlukImplType::loadPLTypeOption(ilukimplTypeStrs, ilukimplTypeEnums);
356 s2i(ilukimplTypeStrs(), ilukimplTypeEnums(), typeName,
false);
361 if (ilukimplType == Details::IlukImplType::KSPILUK) {
362 this->isKokkosKernelsSpiluk_ =
true;
364 this->isKokkosKernelsSpiluk_ =
false;
368 const std::string paramName(
"fact: kspiluk number-of-streams");
369 getParamTryingTypes<int, int, global_ordinal_type>(nstreams, params, paramName, prefix);
373 L_solver_->setParameters(params);
374 U_solver_->setParameters(params);
380 LevelOfFill_ = fillLevel;
381 Overalloc_ = overalloc;
382 #ifdef KOKKOS_ENABLE_OPENMP
383 if constexpr (std::is_same_v<execution_space, Kokkos::OpenMP>) {
387 num_streams_ = nstreams;
389 if (num_streams_ >= 1) {
390 this->isKokkosKernelsStream_ =
true;
392 if (params.
isParameter(
"fact: kspiluk reordering in streams"))
393 hasStreamReordered_ = params.
get<
bool>(
"fact: kspiluk reordering in streams");
395 if (params.
isParameter(
"fact: kspiluk streams use rcb"))
396 hasStreamsWithRCB_ = params.
get<
bool>(
"fact: kspiluk streams use rcb");
398 this->isKokkosKernelsStream_ =
false;
406 if (absThresh != -STM::one()) {
407 Athresh_ = absThresh;
409 if (relThresh != -STM::one()) {
410 Rthresh_ = relThresh;
412 if (relaxValue != -STM::one()) {
413 RelaxValue_ = relaxValue;
417 template <
class MatrixType>
423 template <
class MatrixType>
429 template <
class MatrixType>
432 return A_coordinates_;
435 template <
class MatrixType>
440 using Teuchos::rcp_dynamic_cast;
441 using Teuchos::rcp_implicit_cast;
446 if (A->getRowMap()->getComm()->getSize() == 1 ||
447 A->getRowMap()->isSameAs(*(A->getColMap()))) {
454 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
456 if (!A_lf_r.is_null()) {
466 template <
class MatrixType>
472 using Teuchos::rcp_const_cast;
473 using Teuchos::rcp_dynamic_cast;
474 using Teuchos::rcp_implicit_cast;
479 typedef Tpetra::Map<local_ordinal_type,
483 const char prefix[] =
"Ifpack2::RILUK::initialize: ";
486 "call setMatrix() with a nonnull input before calling this method.");
488 "fill complete. You may not invoke initialize() or compute() with this "
489 "matrix until the matrix is fill complete. If your matrix is a "
490 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
491 "range Maps, if appropriate) before calling this method.");
494 double startTime = timer.wallTime();
505 isInitialized_ =
false;
506 isAllocated_ =
false;
508 Graph_ = Teuchos::null;
510 reordered_x_ =
nullptr;
511 reordered_y_ =
nullptr;
513 if (isKokkosKernelsStream_) {
514 Graph_v_ = std::vector<Teuchos::RCP<iluk_graph_type> >(num_streams_);
515 A_local_diagblks_v_ = std::vector<local_matrix_device_type>(num_streams_);
516 for (
int i = 0; i < num_streams_; i++) {
517 Graph_v_[i] = Teuchos::null;
521 A_local_ = makeLocalFilter(A_);
524 A_local_.is_null(), std::logic_error,
525 "Ifpack2::RILUK::initialize: "
526 "makeLocalFilter returned null; it failed to compute A_local. "
527 "Please report this bug to the Ifpack2 developers.");
536 A_local_crs_ = Details::getCrsMatrix(A_local_);
537 if (A_local_crs_.is_null()) {
538 local_ordinal_type numRows = A_local_->getLocalNumRows();
539 Array<size_t> entriesPerRow(numRows);
540 for (local_ordinal_type i = 0; i < numRows; i++) {
541 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
545 A_local_->getColMap(),
548 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
549 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
550 for (local_ordinal_type i = 0; i < numRows; i++) {
551 size_t numEntries = 0;
552 A_local_->getLocalRowCopy(i, indices, values, numEntries);
553 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
555 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
558 if (!isKokkosKernelsStream_) {
560 LevelOfFill_, 0, Overalloc_));
562 std::vector<int> weights(num_streams_);
563 std::fill(weights.begin(), weights.end(), 1);
564 exec_space_instances_ = Kokkos::Experimental::partition_space(
execution_space(), weights);
566 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
567 if (!hasStreamReordered_) {
568 if (hasStreamsWithRCB_) {
569 TEUCHOS_TEST_FOR_EXCEPTION(A_coordinates_.is_null(), std::runtime_error, prefix <<
"The coordinates associated with rows of the input matrix is null while RILUK uses streams with RCB. Please call setCoord() with a nonnull input before calling this method.");
570 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
571 perm_rcb_ = perm_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_rcb_"), A_coordinates_lcl.extent(0));
572 coors_rcb_ = coors_view_t(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"coors_rcb_"), A_coordinates_lcl.extent(0), A_coordinates_lcl.extent(1));
573 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
574 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
575 A_local_diagblks_v_, perm_rcb_);
577 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_);
580 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_,
true);
581 reverse_perm_v_.resize(perm_v_.size());
582 for (
size_t istream = 0; istream < perm_v_.size(); ++istream) {
583 using perm_type =
typename lno_nonzero_view_t::non_const_type;
584 const auto perm = perm_v_[istream];
585 const auto perm_length = perm.extent(0);
586 perm_type reverse_perm(
587 Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm"),
589 Kokkos::parallel_for(
590 Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
591 KOKKOS_LAMBDA(
const local_ordinal_type ii) {
592 reverse_perm(perm(ii)) = ii;
594 reverse_perm_v_[istream] = reverse_perm;
598 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
599 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
600 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
602 for (
int i = 0; i < num_streams_; i++) {
603 A_local_diagblks_rowmap_v_[i] = A_local_diagblks_v_[i].graph.row_map;
604 A_local_diagblks_entries_v_[i] = A_local_diagblks_v_[i].graph.entries;
605 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
608 A_local_diagblks_v_[i].numRows(),
609 A_local_crs_->getRowMap()->getComm()));
611 A_local_diagblks_v_[i].numCols(),
612 A_local_crs_->getColMap()->getComm()));
614 A_local_diagblks_ColMap,
615 A_local_diagblks_v_[i]));
617 LevelOfFill_, 0, Overalloc_));
622 if (this->isKokkosKernelsSpiluk_) {
623 if (!isKokkosKernelsStream_) {
624 this->KernelHandle_ =
Teuchos::rcp(
new kk_handle_type());
625 KernelHandle_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
626 A_local_->getLocalNumRows(),
627 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1),
628 2 * A_local_->getLocalNumEntries() * (LevelOfFill_ + 1));
629 Graph_->initialize(KernelHandle_);
631 KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(num_streams_);
632 for (
int i = 0; i < num_streams_; i++) {
633 KernelHandle_v_[i] =
Teuchos::rcp(
new kk_handle_type());
634 KernelHandle_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
635 A_local_diagblks_v_[i].numRows(),
636 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1),
637 2 * A_local_diagblks_v_[i].nnz() * (LevelOfFill_ + 1));
638 Graph_v_[i]->initialize(KernelHandle_v_[i]);
642 Graph_->initialize();
646 checkOrderingConsistency(*A_local_);
647 if (!isKokkosKernelsStream_) {
648 L_solver_->setMatrix(L_);
650 L_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
651 L_solver_->setMatrices(L_v_);
653 L_solver_->initialize();
655 if (!isKokkosKernelsStream_) {
656 U_solver_->setMatrix(U_);
658 U_solver_->setStreamInfo(isKokkosKernelsStream_, num_streams_, exec_space_instances_);
659 U_solver_->setMatrices(U_v_);
661 U_solver_->initialize();
668 isInitialized_ =
true;
670 initializeTime_ += (timer.wallTime() - startTime);
673 template <
class MatrixType>
681 bool gidsAreConsistentlyOrdered =
true;
682 global_ordinal_type indexOfInconsistentGID = 0;
683 for (global_ordinal_type i = 0; i < rowGIDs.
size(); ++i) {
684 if (rowGIDs[i] != colGIDs[i]) {
685 gidsAreConsistentlyOrdered =
false;
686 indexOfInconsistentGID = i;
691 "The ordering of the local GIDs in the row and column maps is not the same"
693 <<
"at index " << indexOfInconsistentGID
694 <<
". Consistency is required, as all calculations are done with"
696 <<
"local indexing.");
699 template <
class MatrixType>
700 void RILUK<MatrixType>::
701 initAllValues(
const row_matrix_type& A) {
707 using Teuchos::reduceAll;
708 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
710 size_t NumIn = 0, NumL = 0, NumU = 0;
711 bool DiagFound =
false;
712 size_t NumNonzeroDiags = 0;
713 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
717 nonconst_local_inds_host_view_type InI(
"InI", MaxNumEntries);
720 nonconst_values_host_view_type InV(
"InV", MaxNumEntries);
725 const bool ReplaceValues = L_->isStaticGraph() || L_->isLocallyIndexed();
730 L_->setAllToScalar(STS::zero());
731 U_->setAllToScalar(STS::zero());
734 D_->putScalar(STS::zero());
735 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
737 RCP<const map_type> rowMap = L_->getRowMap();
748 for (
size_t myRow = 0; myRow < A.getLocalNumRows(); ++myRow) {
749 local_ordinal_type local_row = myRow;
753 A.getLocalRowCopy(local_row, InI, InV, NumIn);
761 for (
size_t j = 0; j < NumIn; ++j) {
762 const local_ordinal_type k = InI[j];
764 if (k == local_row) {
767 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
770 true, std::runtime_error,
771 "Ifpack2::RILUK::initAllValues: current "
773 << k <<
" < 0. I'm not sure why this is an error; it is "
774 "probably an artifact of the undocumented assumptions of the "
775 "original implementation (likely copied and pasted from Ifpack). "
776 "Nevertheless, the code I found here insisted on this being an error "
777 "state, so I will throw an exception here.");
778 }
else if (k < local_row) {
782 }
else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
794 DV(local_row) = Athresh_;
799 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0, NumL));
803 L_->insertLocalValues(local_row, LI(0, NumL), LV(0, NumL));
809 U_->replaceLocalValues(local_row, UI(0, NumU), UV(0, NumU));
813 U_->insertLocalValues(local_row, UI(0, NumU), UV(0, NumU));
821 isInitialized_ =
true;
824 template <
class MatrixType>
825 void RILUK<MatrixType>::compute_serial() {
828 initAllValues(*A_local_);
833 const scalar_type MinDiagonalValue = STS::rmin();
834 const scalar_type MaxDiagonalValue = STS::one() / MinDiagonalValue;
836 size_t NumIn, NumL, NumU;
839 const size_t MaxNumEntries =
840 L_->getLocalMaxNumRowEntries() + U_->getLocalMaxNumRowEntries() + 1;
844 size_t num_cols = U_->getColMap()->getLocalNumElements();
847 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
851 using IST =
typename row_matrix_type::impl_scalar_type;
852 for (
size_t i = 0; i < L_->getLocalNumRows(); ++i) {
853 local_ordinal_type local_row = i;
856 local_inds_host_view_type UUI;
857 values_host_view_type UUV;
861 NumIn = MaxNumEntries;
862 nonconst_local_inds_host_view_type InI_v(InI.data(), MaxNumEntries);
863 nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.data()), MaxNumEntries);
865 L_->getLocalRowCopy(local_row, InI_v, InV_v, NumL);
868 InI[NumL] = local_row;
870 nonconst_local_inds_host_view_type InI_sub(InI.data() + NumL + 1, MaxNumEntries - NumL - 1);
871 nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data()) + NumL + 1, MaxNumEntries - NumL - 1);
873 U_->getLocalRowCopy(local_row, InI_sub, InV_sub, NumU);
874 NumIn = NumL + NumU + 1;
877 for (
size_t j = 0; j < NumIn; ++j) {
881 scalar_type diagmod = STS::zero();
883 for (
size_t jj = 0; jj < NumL; ++jj) {
884 local_ordinal_type j = InI[jj];
885 IST multiplier = InV[jj];
887 InV[jj] *=
static_cast<scalar_type
>(DV(j));
889 U_->getLocalRowView(j, UUI, UUV);
892 if (RelaxValue_ == STM::zero()) {
893 for (
size_t k = 0; k < NumUU; ++k) {
894 const int kk = colflag[UUI[k]];
899 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
904 for (
size_t k = 0; k < NumUU; ++k) {
908 const int kk = colflag[UUI[k]];
910 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
912 diagmod -=
static_cast<scalar_type
>(multiplier * UUV[k]);
920 L_->replaceLocalValues(local_row, InI(0, NumL), InV(0, NumL));
925 if (RelaxValue_ != STM::zero()) {
926 DV(i) += RelaxValue_ * diagmod;
929 if (STS::magnitude(DV(i)) > STS::magnitude(MaxDiagonalValue)) {
930 if (STS::real(DV(i)) < STM::zero()) {
931 DV(i) = -MinDiagonalValue;
933 DV(i) = MinDiagonalValue;
936 DV(i) =
static_cast<impl_scalar_type
>(STS::one()) / DV(i);
939 for (
size_t j = 0; j < NumU; ++j) {
940 InV[NumL + 1 + j] *=
static_cast<scalar_type
>(DV(i));
945 U_->replaceLocalValues(local_row, InI(NumL + 1, NumU), InV(NumL + 1, NumU));
949 for (
size_t j = 0; j < NumIn; ++j) {
950 colflag[InI[j]] = -1;
961 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
962 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
965 L_solver_->setMatrix(L_);
966 L_solver_->compute();
967 U_solver_->setMatrix(U_);
968 U_solver_->compute();
971 template <
class MatrixType>
972 void RILUK<MatrixType>::compute_kkspiluk() {
976 L_->setAllToScalar(STS::zero());
977 U_->setAllToScalar(STS::zero());
979 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
980 auto lclL = L_->getLocalMatrixDevice();
981 row_map_type L_rowmap = lclL.graph.row_map;
982 auto L_entries = lclL.graph.entries;
983 auto L_values = lclL.values;
985 auto lclU = U_->getLocalMatrixDevice();
986 row_map_type U_rowmap = lclU.graph.row_map;
987 auto U_entries = lclU.graph.entries;
988 auto U_values = lclU.values;
990 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
991 KokkosSparse::spiluk_numeric(KernelHandle_.getRawPtr(), LevelOfFill_,
992 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
993 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
995 L_->fillComplete(L_->getColMap(), A_local_->getRangeMap());
996 U_->fillComplete(A_local_->getDomainMap(), U_->getRowMap());
998 L_solver_->compute();
999 U_solver_->compute();
1002 template <
class MatrixType>
1003 void RILUK<MatrixType>::compute_kkspiluk_stream() {
1004 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1005 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1006 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1007 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1008 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1009 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1010 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(num_streams_);
1011 for (
int i = 0; i < num_streams_; i++) {
1012 L_v_[i]->resumeFill();
1013 U_v_[i]->resumeFill();
1015 L_v_[i]->setAllToScalar(STS::zero());
1016 U_v_[i]->setAllToScalar(STS::zero());
1018 auto lclL = L_v_[i]->getLocalMatrixDevice();
1019 L_rowmap_v[i] = lclL.graph.row_map;
1020 L_entries_v[i] = lclL.graph.entries;
1021 L_values_v[i] = lclL.values;
1023 auto lclU = U_v_[i]->getLocalMatrixDevice();
1024 U_rowmap_v[i] = lclU.graph.row_map;
1025 U_entries_v[i] = lclU.graph.entries;
1026 U_values_v[i] = lclU.values;
1027 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1030 if (hasStreamReordered_) {
1034 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1037 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1038 const auto A_nrows = lclMtx.numRows();
1039 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1040 ? (A_nrows / num_streams_)
1041 : (A_nrows / num_streams_ + 1);
1042 for (
int i = 0; i < num_streams_; i++) {
1043 const auto start_row_offset = i * rows_per_block;
1044 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1045 auto colindices = A_local_diagblks_entries_v_[i];
1046 auto values = A_local_diagblks_values_v_[i];
1047 const bool reordered = hasStreamReordered_;
1048 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] :
typename lno_nonzero_view_t::non_const_type{};
1049 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1050 Kokkos::parallel_for(
1051 pol, KOKKOS_LAMBDA(
const typename TeamPolicy::member_type& team) {
1052 const auto irow = team.league_rank();
1053 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1054 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1055 const auto begin_row = rowptrs(irow);
1056 const auto num_entries = rowptrs(irow + 1) - begin_row;
1057 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](
const int j) {
1058 const auto colidx = colindices(begin_row + j);
1059 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1061 const int offset = KokkosSparse::findRelOffset(
1062 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0,
false);
1063 values(begin_row + j) = A_local_crs_row.value(offset);
1069 KokkosSparse::Experimental::spiluk_numeric_streams(exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1070 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1071 L_rowmap_v, L_entries_v, L_values_v,
1072 U_rowmap_v, U_entries_v, U_values_v);
1074 for (
int i = 0; i < num_streams_; i++) {
1075 L_v_[i]->fillComplete();
1076 U_v_[i]->fillComplete();
1079 L_solver_->compute();
1080 U_solver_->compute();
1083 template <
class MatrixType>
1089 using Teuchos::rcp_const_cast;
1090 using Teuchos::rcp_dynamic_cast;
1091 const char prefix[] =
"Ifpack2::RILUK::compute: ";
1097 "call setMatrix() with a nonnull input before calling this method.");
1099 "fill complete. You may not invoke initialize() or compute() with this "
1100 "matrix until the matrix is fill complete. If your matrix is a "
1101 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1102 "range Maps, if appropriate) before calling this method.");
1104 if (!isInitialized()) {
1112 double startTime = timer.
wallTime();
1114 isComputed_ =
false;
1116 if (!this->isKokkosKernelsSpiluk_) {
1120 if (!A_local_crs_nc_.is_null()) {
1122 A_local_crs_nc_->resumeFill();
1124 nonconst_local_inds_host_view_type indices(
"indices", A_local_->getLocalMaxNumRowEntries());
1125 nonconst_values_host_view_type values(
"values", A_local_->getLocalMaxNumRowEntries());
1127 size_t numEntries = 0;
1128 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1129 A_local_crs_nc_->replaceLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
1131 A_local_crs_nc_->fillComplete(A_local_->getDomainMap(), A_local_->getRangeMap());
1134 if (!isKokkosKernelsStream_) {
1138 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1139 if (!hasStreamsWithRCB_) {
1140 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks_v_, hasStreamReordered_);
1142 auto A_coordinates_lcl = A_coordinates_->getLocalViewDevice(Tpetra::Access::ReadOnly);
1143 Kokkos::deep_copy(coors_rcb_, A_coordinates_lcl);
1144 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_with_rcb_sequential(lclMtx, coors_rcb_,
1145 A_local_diagblks_v_, perm_rcb_);
1147 for (
int i = 0; i < num_streams_; i++) {
1148 A_local_diagblks_values_v_[i] = A_local_diagblks_v_[i].values;
1151 compute_kkspiluk_stream();
1157 computeTime_ += (timer.
wallTime() - startTime);
1161 template <
typename MV,
typename Map>
1162 void resetMultiVecIfNeeded(std::unique_ptr<MV>& mv_ptr,
const Map& map,
const size_t numVectors,
bool initialize) {
1163 if (!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1164 mv_ptr.reset(
new MV(map, numVectors, initialize));
1169 template <
class MatrixType>
1171 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1172 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1177 using Teuchos::rcpFromRef;
1180 A_.
is_null(), std::runtime_error,
1181 "Ifpack2::RILUK::apply: The matrix is "
1182 "null. Please call setMatrix() with a nonnull input, then initialize() "
1183 "and compute(), before calling this method.");
1185 !isComputed(), std::runtime_error,
1186 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1187 "you must call compute() before calling this method.");
1188 TEUCHOS_TEST_FOR_EXCEPTION(
1189 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1190 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1191 "X.getNumVectors() = "
1192 << X.getNumVectors()
1193 <<
" != Y.getNumVectors() = " << Y.getNumVectors() <<
".");
1194 TEUCHOS_TEST_FOR_EXCEPTION(
1196 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1197 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1198 "fixed. There is a FIXME in this file about this very issue.");
1199 #ifdef HAVE_IFPACK2_DEBUG
1201 if (!isKokkosKernelsStream_) {
1203 TEUCHOS_TEST_FOR_EXCEPTION(STM::isnaninf(D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1208 for (
size_t j = 0; j < X.getNumVectors(); ++j) {
1209 if (STM::isnaninf(norms[j])) {
1214 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1216 #endif // HAVE_IFPACK2_DEBUG
1222 double startTime = timer.
wallTime();
1225 if (alpha == one && beta == zero) {
1226 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && (hasStreamReordered_ || hasStreamsWithRCB_)) {
1227 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(),
false);
1228 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(),
false);
1231 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1232 auto X_j = X.getVector(j);
1233 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1234 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1235 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1236 if (hasStreamReordered_) {
1239 for (
int i = 0; i < num_streams_; i++) {
1240 auto perm_i = perm_v_[i];
1241 stream_end = stream_begin + perm_i.extent(0);
1242 auto X_lcl_sub = Kokkos::subview(X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1243 auto ReorderedX_lcl_sub = Kokkos::subview(ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1244 Kokkos::parallel_for(
1245 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1246 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1248 stream_begin = stream_end;
1251 auto perm = perm_rcb_;
1252 Kokkos::parallel_for(
1253 Kokkos::RangePolicy<execution_space>(0, static_cast<int>(X_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1254 ReorderedX_lcl(perm(ii), 0) = X_lcl(ii, 0);
1262 L_solver_->apply(*reordered_x_, Y, mode);
1264 U_solver_->apply(Y, *reordered_y_, mode);
1267 U_solver_->apply(*reordered_x_, Y, mode);
1269 L_solver_->apply(Y, *reordered_y_, mode);
1272 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1273 auto Y_j = Y.getVectorNonConst(j);
1274 auto ReorderedY_j = reordered_y_->getVector(j);
1275 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1276 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1277 if (hasStreamReordered_) {
1280 for (
int i = 0; i < num_streams_; i++) {
1281 auto perm_i = perm_v_[i];
1282 stream_end = stream_begin + perm_i.extent(0);
1283 auto Y_lcl_sub = Kokkos::subview(Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1284 auto ReorderedY_lcl_sub = Kokkos::subview(ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1285 Kokkos::parallel_for(
1286 Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1287 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1289 stream_begin = stream_end;
1292 auto perm = perm_rcb_;
1293 Kokkos::parallel_for(
1294 Kokkos::RangePolicy<execution_space>(0, static_cast<int>(Y_lcl.extent(0))), KOKKOS_LAMBDA(
const int& ii) {
1295 Y_lcl(ii, 0) = ReorderedY_lcl(perm(ii), 0);
1302 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1306 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1309 L_solver_->apply(X, *Y_tmp_, mode);
1311 if (!this->isKokkosKernelsSpiluk_) {
1314 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1317 U_solver_->apply(*Y_tmp_, Y, mode);
1320 L_solver_->apply(X, Y, mode);
1322 if (!this->isKokkosKernelsSpiluk_) {
1325 Y.elementWiseMultiply(one, *D_, Y, zero);
1328 U_solver_->apply(Y, Y, mode);
1331 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1335 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1338 U_solver_->apply(X, *Y_tmp_, mode);
1340 if (!this->isKokkosKernelsSpiluk_) {
1346 Y_tmp_->elementWiseMultiply(one, *D_, *Y_tmp_, zero);
1349 L_solver_->apply(*Y_tmp_, Y, mode);
1352 U_solver_->apply(X, Y, mode);
1354 if (!this->isKokkosKernelsSpiluk_) {
1360 Y.elementWiseMultiply(one, *D_, Y, zero);
1363 L_solver_->apply(Y, Y, mode);
1368 if (alpha == zero) {
1378 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1379 apply(X, *Y_tmp_, mode);
1380 Y.update(alpha, *Y_tmp_, beta);
1385 #ifdef HAVE_IFPACK2_DEBUG
1390 for (
size_t j = 0; j < Y.getNumVectors(); ++j) {
1391 if (STM::isnaninf(norms[j])) {
1396 TEUCHOS_TEST_FOR_EXCEPTION(!good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1398 #endif // HAVE_IFPACK2_DEBUG
1401 applyTime_ += (timer.
wallTime() - startTime);
1437 template <
class MatrixType>
1439 std::ostringstream os;
1444 os <<
"\"Ifpack2::RILUK\": {";
1445 os <<
"Initialized: " << (isInitialized() ?
"true" :
"false") <<
", "
1446 <<
"Computed: " << (isComputed() ?
"true" :
"false") <<
", ";
1448 os <<
"Level-of-fill: " << getLevelOfFill() <<
", ";
1450 if (isKokkosKernelsSpiluk_) os <<
"KK-SPILUK, ";
1451 if (isKokkosKernelsStream_) os <<
"KK-Stream, ";
1454 os <<
"Matrix: null";
1456 os <<
"Global matrix dimensions: ["
1457 << A_->getGlobalNumRows() <<
", " << A_->getGlobalNumCols() <<
"]"
1458 <<
", Global nnz: " << A_->getGlobalNumEntries();
1461 if (!L_solver_.is_null()) os <<
", " << L_solver_->description();
1462 if (!U_solver_.is_null()) os <<
", " << U_solver_->description();
1470 #define IFPACK2_RILUK_INSTANT(S, LO, GO, N) \
1471 template class Ifpack2::RILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
void setCoord(const Teuchos::RCP< const coord_type > &A_coordinates)
Set the matrix rows' coordinates.
Definition: Ifpack2_RILUK_def.hpp:153
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_RILUK_def.hpp:239
node_type::execution_space execution_space
The Kokkos execution space of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:241
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
Definition: Ifpack2_RILUK_def.hpp:437
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:229
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > & getD() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:177
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:467
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_RILUK_decl.hpp:248
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_RILUK_def.hpp:121
std::string description() const
A one-line description of this object.
Definition: Ifpack2_RILUK_def.hpp:1438
void setParameters(const Teuchos::ParameterList ¶ms)
Definition: Ifpack2_RILUK_def.hpp:298
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:213
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_RILUK_def.hpp:202
Teuchos::RCP< const crs_matrix_type > getCrsMatrix() const
Return the input matrix A as a Tpetra::CrsMatrix, if possible; else throws.
Definition: Ifpack2_RILUK_def.hpp:425
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_RILUK_decl.hpp:250
const crs_matrix_type & getU() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:190
bool isParameter(const std::string &name) const
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:232
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_RILUK_def.hpp:219
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:226
virtual ~RILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_RILUK_def.hpp:98
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:1084
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_RILUK_def.hpp:1171
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_RILUK_decl.hpp:235
void resize(size_type new_size, const value_type &x=value_type())
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:67
Teuchos::RCP< const coord_type > getCoord() const
Get the coordinates associated with the input matrix's rows.
Definition: Ifpack2_RILUK_def.hpp:431
IntegralType getIntegralValue(const std::string &str, const std::string ¶mName="", const std::string &sublistName="") const
Teuchos::RCP< const row_matrix_type > getMatrix() const
Get the input matrix.
Definition: Ifpack2_RILUK_def.hpp:419
bool isType(const std::string &name) const
Declaration of MDF interface.
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:127
const crs_matrix_type & getL() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:161
std::string typeName(const T &t)
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:223