10 #ifndef IFPACK2_CRSRILUK_DEF_HPP
11 #define IFPACK2_CRSRILUK_DEF_HPP
14 #include "Ifpack2_LocalFilter.hpp"
15 #include "Tpetra_CrsMatrix.hpp"
16 #include "Teuchos_StandardParameterEntryValidators.hpp"
17 #include "Ifpack2_LocalSparseTriangularSolver.hpp"
18 #include "Ifpack2_Details_getParamTryingTypes.hpp"
19 #include "Ifpack2_Details_getCrsMatrix.hpp"
20 #include "Kokkos_Sort.hpp"
21 #include "KokkosSparse_Utils.hpp"
22 #include "KokkosKernels_Sorting.hpp"
23 #include "KokkosSparse_IOUtils.hpp"
36 type_strs[0] =
"Serial";
37 type_strs[1] =
"KSPILUK";
39 type_enums[0] = Serial;
40 type_enums[1] = KSPILUK;
45 template<
class MatrixType>
51 isInitialized_ (false),
56 initializeTime_ (0.0),
62 isKokkosKernelsSpiluk_(false),
63 isKokkosKernelsStream_(false),
65 hasStreamReordered_(false)
71 template<
class MatrixType>
77 isInitialized_ (false),
82 initializeTime_ (0.0),
88 isKokkosKernelsSpiluk_(false),
89 isKokkosKernelsStream_(false),
91 hasStreamReordered_(false)
97 template<
class MatrixType>
100 if (!isKokkosKernelsStream_) {
102 KernelHandle_->destroy_spiluk_handle();
106 for (
int i = 0; i < num_streams_; i++) {
108 KernelHandle_v_[i]->destroy_spiluk_handle();
114 template<
class MatrixType>
118 L_solver_->setObjectLabel(
"lower");
120 U_solver_->setObjectLabel(
"upper");
123 template<
class MatrixType>
132 isAllocated_ =
false;
133 isInitialized_ =
false;
135 A_local_ = Teuchos::null;
136 Graph_ = Teuchos::null;
143 if (! L_solver_.is_null ()) {
144 L_solver_->setMatrix (Teuchos::null);
146 if (! U_solver_.is_null ()) {
147 U_solver_->setMatrix (Teuchos::null);
159 template<
class MatrixType>
164 L_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getL: The L factor "
165 "is null. Please call initialize() (and preferably also compute()) "
166 "before calling this method. If the input matrix has not yet been set, "
167 "you must first call setMatrix() with a nonnull input matrix before you "
168 "may call initialize() or compute().");
173 template<
class MatrixType>
174 const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
181 D_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getD: The D factor "
182 "(of diagonal entries) is null. Please call initialize() (and "
183 "preferably also compute()) before calling this method. If the input "
184 "matrix has not yet been set, you must first call setMatrix() with a "
185 "nonnull input matrix before you may call initialize() or compute().");
190 template<
class MatrixType>
195 U_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getU: The U factor "
196 "is null. Please call initialize() (and preferably also compute()) "
197 "before calling this method. If the input matrix has not yet been set, "
198 "you must first call setMatrix() with a nonnull input matrix before you "
199 "may call initialize() or compute().");
204 template<
class MatrixType>
207 A_.
is_null (), std::runtime_error,
"Ifpack2::RILUK::getNodeSmootherComplexity: "
208 "The input matrix A is null. Please call setMatrix() with a nonnull "
209 "input matrix, then call compute(), before calling this method.");
211 if(!L_.is_null() && !U_.is_null())
212 return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
218 template<
class MatrixType>
224 A_.
is_null (), std::runtime_error,
"Ifpack2::RILUK::getDomainMap: "
225 "The matrix is null. Please call setMatrix() with a nonnull input "
226 "before calling this method.");
230 Graph_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getDomainMap: "
231 "The computed graph is null. Please call initialize() before calling "
233 return Graph_->getL_Graph ()->getDomainMap ();
237 template<
class MatrixType>
243 A_.
is_null (), std::runtime_error,
"Ifpack2::RILUK::getRangeMap: "
244 "The matrix is null. Please call setMatrix() with a nonnull input "
245 "before calling this method.");
249 Graph_.is_null (), std::runtime_error,
"Ifpack2::RILUK::getRangeMap: "
250 "The computed graph is null. Please call initialize() before calling "
252 return Graph_->getL_Graph ()->getRangeMap ();
256 template<
class MatrixType>
262 if (! isAllocated_) {
263 if (!isKokkosKernelsStream_) {
272 L_ =
rcp (
new crs_matrix_type (Graph_->getL_Graph ()));
273 U_ =
rcp (
new crs_matrix_type (Graph_->getU_Graph ()));
274 L_->setAllToScalar (STS::zero ());
275 U_->setAllToScalar (STS::zero ());
280 D_ =
rcp (
new vec_type (Graph_->getL_Graph ()->getRowMap ()));
283 L_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
284 U_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
285 for (
int i = 0; i < num_streams_; i++) {
289 L_v_[i] =
rcp (
new crs_matrix_type (Graph_v_[i]->getL_Graph ()));
290 U_v_[i] =
rcp (
new crs_matrix_type (Graph_v_[i]->getU_Graph ()));
291 L_v_[i]->setAllToScalar (STS::zero ());
292 U_v_[i]->setAllToScalar (STS::zero ());
294 L_v_[i]->fillComplete ();
295 U_v_[i]->fillComplete ();
303 template<
class MatrixType>
311 using Details::getParamTryingTypes;
312 const char prefix[] =
"Ifpack2::RILUK: ";
319 double overalloc = 2.;
331 const std::string paramName (
"fact: iluk level-of-fill");
332 getParamTryingTypes<int, int, global_ordinal_type, double, float>
333 (fillLevel, params, paramName, prefix);
338 const std::string paramName (
"fact: absolute threshold");
339 getParamTryingTypes<magnitude_type, magnitude_type, double>
340 (absThresh, params, paramName, prefix);
343 const std::string paramName (
"fact: relative threshold");
344 getParamTryingTypes<magnitude_type, magnitude_type, double>
345 (relThresh, params, paramName, prefix);
348 const std::string paramName (
"fact: relax value");
349 getParamTryingTypes<magnitude_type, magnitude_type, double>
350 (relaxValue, params, paramName, prefix);
353 const std::string paramName (
"fact: iluk overalloc");
354 getParamTryingTypes<double, double>
355 (overalloc, params, paramName, prefix);
359 Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
361 static const char typeName[] =
"fact: type";
363 if ( ! params.
isType<std::string>(typeName))
break;
366 Array<std::string> ilukimplTypeStrs;
367 Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
368 Details::IlukImplType::loadPLTypeOption (ilukimplTypeStrs, ilukimplTypeEnums);
370 s2i(ilukimplTypeStrs (), ilukimplTypeEnums (), typeName,
false);
375 if (ilukimplType == Details::IlukImplType::KSPILUK) {
376 this->isKokkosKernelsSpiluk_ =
true;
379 this->isKokkosKernelsSpiluk_ =
false;
383 const std::string paramName (
"fact: kspiluk number-of-streams");
384 getParamTryingTypes<int, int, global_ordinal_type>
385 (nstreams, params, paramName, prefix);
389 L_solver_->setParameters(params);
390 U_solver_->setParameters(params);
396 LevelOfFill_ = fillLevel;
397 Overalloc_ = overalloc;
398 #ifdef KOKKOS_ENABLE_OPENMP
399 if constexpr (std::is_same_v<execution_space, Kokkos::OpenMP>) {
403 num_streams_ = nstreams;
405 if (num_streams_ >= 1) {
406 this->isKokkosKernelsStream_ =
true;
408 if (params.
isParameter(
"fact: kspiluk reordering in streams"))
409 hasStreamReordered_ = params.
get<
bool> (
"fact: kspiluk reordering in streams");
412 this->isKokkosKernelsStream_ =
false;
420 if (absThresh != -STM::one ()) {
421 Athresh_ = absThresh;
423 if (relThresh != -STM::one ()) {
424 Rthresh_ = relThresh;
426 if (relaxValue != -STM::one ()) {
427 RelaxValue_ = relaxValue;
432 template<
class MatrixType>
439 template<
class MatrixType>
446 template<
class MatrixType>
452 using Teuchos::rcp_dynamic_cast;
453 using Teuchos::rcp_implicit_cast;
458 if (A->getRowMap ()->getComm ()->getSize () == 1 ||
459 A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
466 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
468 if (! A_lf_r.is_null ()) {
480 template<
class MatrixType>
485 using Teuchos::rcp_const_cast;
486 using Teuchos::rcp_dynamic_cast;
487 using Teuchos::rcp_implicit_cast;
493 typedef Tpetra::Map<local_ordinal_type,
496 const char prefix[] =
"Ifpack2::RILUK::initialize: ";
499 (A_.
is_null (), std::runtime_error, prefix <<
"The matrix is null. Please "
500 "call setMatrix() with a nonnull input before calling this method.");
502 (! A_->isFillComplete (), std::runtime_error, prefix <<
"The matrix is not "
503 "fill complete. You may not invoke initialize() or compute() with this "
504 "matrix until the matrix is fill complete. If your matrix is a "
505 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
506 "range Maps, if appropriate) before calling this method.");
509 double startTime = timer.wallTime();
520 isInitialized_ =
false;
521 isAllocated_ =
false;
523 Graph_ = Teuchos::null;
525 reordered_x_ =
nullptr;
526 reordered_y_ =
nullptr;
528 if (isKokkosKernelsStream_) {
529 Graph_v_ = std::vector< Teuchos::RCP<iluk_graph_type> >(num_streams_);
530 A_local_diagblks = std::vector<local_matrix_device_type>(num_streams_);
531 for (
int i = 0; i < num_streams_; i++) {
532 Graph_v_[i] = Teuchos::null;
536 A_local_ = makeLocalFilter (A_);
539 A_local_.is_null (), std::logic_error,
"Ifpack2::RILUK::initialize: "
540 "makeLocalFilter returned null; it failed to compute A_local. "
541 "Please report this bug to the Ifpack2 developers.");
550 A_local_crs_ = Details::getCrsMatrix(A_local_);
551 if(A_local_crs_.is_null()) {
552 local_ordinal_type numRows = A_local_->getLocalNumRows();
553 Array<size_t> entriesPerRow(numRows);
554 for(local_ordinal_type i = 0; i < numRows; i++) {
555 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
559 A_local_->getColMap (),
562 nonconst_local_inds_host_view_type indices(
"indices",A_local_->getLocalMaxNumRowEntries());
563 nonconst_values_host_view_type values(
"values",A_local_->getLocalMaxNumRowEntries());
564 for(local_ordinal_type i = 0; i < numRows; i++) {
565 size_t numEntries = 0;
566 A_local_->getLocalRowCopy(i, indices, values, numEntries);
567 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
569 A_local_crs_nc_->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
570 A_local_crs_ = rcp_const_cast<
const crs_matrix_type> (A_local_crs_nc_);
572 if (!isKokkosKernelsStream_) {
574 LevelOfFill_, 0, Overalloc_));
577 std::vector<int> weights(num_streams_);
578 std::fill(weights.begin(), weights.end(), 1);
579 exec_space_instances_ = Kokkos::Experimental::partition_space(
execution_space(), weights);
581 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
582 if (!hasStreamReordered_) {
583 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks);
585 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks,
true);
586 reverse_perm_v_.resize(perm_v_.size());
587 for(
size_t istream=0; istream < perm_v_.size(); ++istream) {
588 using perm_type =
typename lno_nonzero_view_t::non_const_type;
589 const auto perm = perm_v_[istream];
590 const auto perm_length = perm.extent(0);
591 perm_type reverse_perm(
592 Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm"),
594 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
595 KOKKOS_LAMBDA(
const local_ordinal_type ii) {
596 reverse_perm(perm(ii)) = ii;
598 reverse_perm_v_[istream] = reverse_perm;
602 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
603 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
604 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
606 for(
int i = 0; i < num_streams_; i++) {
607 A_local_diagblks_rowmap_v_[i] = A_local_diagblks[i].graph.row_map;
608 A_local_diagblks_entries_v_[i] = A_local_diagblks[i].graph.entries;
609 A_local_diagblks_values_v_[i] = A_local_diagblks[i].values;
612 A_local_diagblks[i].numRows(),
613 A_local_crs_->getRowMap()->getComm()));
615 A_local_diagblks[i].numCols(),
616 A_local_crs_->getColMap()->getComm()));
618 A_local_diagblks_ColMap,
619 A_local_diagblks[i]));
621 LevelOfFill_, 0, Overalloc_));
626 if (this->isKokkosKernelsSpiluk_) {
627 if (!isKokkosKernelsStream_) {
628 this->KernelHandle_ =
Teuchos::rcp (
new kk_handle_type ());
629 KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
630 A_local_->getLocalNumRows(),
631 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
632 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1) );
633 Graph_->initialize (KernelHandle_);
636 KernelHandle_v_ = std::vector< Teuchos::RCP<kk_handle_type> >(num_streams_);
637 for (
int i = 0; i < num_streams_; i++) {
638 KernelHandle_v_[i] =
Teuchos::rcp (
new kk_handle_type ());
639 KernelHandle_v_[i]->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
640 A_local_diagblks[i].numRows(),
641 2*A_local_diagblks[i].nnz()*(LevelOfFill_+1),
642 2*A_local_diagblks[i].nnz()*(LevelOfFill_+1) );
643 Graph_v_[i]->initialize (KernelHandle_v_[i]);
648 Graph_->initialize ();
652 checkOrderingConsistency (*A_local_);
653 if (!isKokkosKernelsStream_) {
654 L_solver_->setMatrix (L_);
657 L_solver_->setStreamInfo (isKokkosKernelsStream_, num_streams_, exec_space_instances_);
658 L_solver_->setMatrices (L_v_);
660 L_solver_->initialize ();
662 if (!isKokkosKernelsStream_) {
663 U_solver_->setMatrix (U_);
666 U_solver_->setStreamInfo (isKokkosKernelsStream_, num_streams_, exec_space_instances_);
667 U_solver_->setMatrices (U_v_);
669 U_solver_->initialize ();
676 isInitialized_ =
true;
678 initializeTime_ += (timer.wallTime() - startTime);
681 template<
class MatrixType>
691 bool gidsAreConsistentlyOrdered=
true;
692 global_ordinal_type indexOfInconsistentGID=0;
693 for (global_ordinal_type i=0; i<rowGIDs.
size(); ++i) {
694 if (rowGIDs[i] != colGIDs[i]) {
695 gidsAreConsistentlyOrdered=
false;
696 indexOfInconsistentGID=i;
701 "The ordering of the local GIDs in the row and column maps is not the same"
702 << std::endl <<
"at index " << indexOfInconsistentGID
703 <<
". Consistency is required, as all calculations are done with"
704 << std::endl <<
"local indexing.");
707 template<
class MatrixType>
710 initAllValues (
const row_matrix_type& A)
717 using Teuchos::reduceAll;
718 typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
720 size_t NumIn = 0, NumL = 0, NumU = 0;
721 bool DiagFound =
false;
722 size_t NumNonzeroDiags = 0;
723 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
727 nonconst_local_inds_host_view_type InI(
"InI",MaxNumEntries);
730 nonconst_values_host_view_type InV(
"InV",MaxNumEntries);
735 const bool ReplaceValues = L_->isStaticGraph () || L_->isLocallyIndexed ();
740 L_->setAllToScalar (STS::zero ());
741 U_->setAllToScalar (STS::zero ());
744 D_->putScalar (STS::zero ());
745 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
747 RCP<const map_type> rowMap = L_->getRowMap ();
758 for (
size_t myRow=0; myRow<A.getLocalNumRows(); ++myRow) {
759 local_ordinal_type local_row = myRow;
763 A.getLocalRowCopy (local_row, InI, InV, NumIn);
771 for (
size_t j = 0; j < NumIn; ++j) {
772 const local_ordinal_type k = InI[j];
774 if (k == local_row) {
777 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
781 true, std::runtime_error,
"Ifpack2::RILUK::initAllValues: current "
782 "GID k = " << k <<
" < 0. I'm not sure why this is an error; it is "
783 "probably an artifact of the undocumented assumptions of the "
784 "original implementation (likely copied and pasted from Ifpack). "
785 "Nevertheless, the code I found here insisted on this being an error "
786 "state, so I will throw an exception here.");
788 else if (k < local_row) {
793 else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
805 DV(local_row) = Athresh_;
810 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0,NumL));
814 L_->insertLocalValues(local_row, LI(0,NumL), LV(0,NumL));
820 U_->replaceLocalValues(local_row, UI(0,NumU), UV(0,NumU));
824 U_->insertLocalValues(local_row, UI(0,NumU), UV(0,NumU));
832 isInitialized_ =
true;
835 template<
class MatrixType>
836 void RILUK<MatrixType>::compute_serial ()
840 initAllValues (*A_local_);
845 const scalar_type MinDiagonalValue = STS::rmin ();
846 const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
848 size_t NumIn, NumL, NumU;
851 const size_t MaxNumEntries =
852 L_->getLocalMaxNumRowEntries () + U_->getLocalMaxNumRowEntries () + 1;
856 size_t num_cols = U_->getColMap()->getLocalNumElements();
859 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
863 using IST =
typename row_matrix_type::impl_scalar_type;
864 for (
size_t i = 0; i < L_->getLocalNumRows (); ++i) {
865 local_ordinal_type local_row = i;
868 local_inds_host_view_type UUI;
869 values_host_view_type UUV;
873 NumIn = MaxNumEntries;
874 nonconst_local_inds_host_view_type InI_v(InI.data(),MaxNumEntries);
875 nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.data()),MaxNumEntries);
877 L_->getLocalRowCopy (local_row, InI_v , InV_v, NumL);
880 InI[NumL] = local_row;
882 nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1);
883 nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data())+NumL+1,MaxNumEntries-NumL-1);
885 U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU);
889 for (
size_t j = 0; j < NumIn; ++j) {
893 scalar_type diagmod = STS::zero ();
895 for (
size_t jj = 0; jj < NumL; ++jj) {
896 local_ordinal_type j = InI[jj];
897 IST multiplier = InV[jj];
899 InV[jj] *=
static_cast<scalar_type
>(DV(j));
901 U_->getLocalRowView(j, UUI, UUV);
904 if (RelaxValue_ == STM::zero ()) {
905 for (
size_t k = 0; k < NumUU; ++k) {
906 const int kk = colflag[UUI[k]];
911 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
917 for (
size_t k = 0; k < NumUU; ++k) {
921 const int kk = colflag[UUI[k]];
923 InV[kk] -=
static_cast<scalar_type
>(multiplier*UUV[k]);
926 diagmod -=
static_cast<scalar_type
>(multiplier*UUV[k]);
934 L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL));
939 if (RelaxValue_ != STM::zero ()) {
940 DV(i) += RelaxValue_*diagmod;
943 if (STS::magnitude (DV(i)) > STS::magnitude (MaxDiagonalValue)) {
944 if (STS::real (DV(i)) < STM::zero ()) {
945 DV(i) = -MinDiagonalValue;
948 DV(i) = MinDiagonalValue;
952 DV(i) =
static_cast<impl_scalar_type
>(STS::one ()) / DV(i);
955 for (
size_t j = 0; j < NumU; ++j) {
956 InV[NumL+1+j] *=
static_cast<scalar_type
>(DV(i));
961 U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
965 for (
size_t j = 0; j < NumIn; ++j) {
966 colflag[InI[j]] = -1;
977 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
978 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
981 L_solver_->setMatrix (L_);
982 L_solver_->compute ();
983 U_solver_->setMatrix (U_);
984 U_solver_->compute ();
988 template<
class MatrixType>
989 void RILUK<MatrixType>::compute_kkspiluk()
994 L_->setAllToScalar (STS::zero ());
995 U_->setAllToScalar (STS::zero ());
997 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
998 auto lclL = L_->getLocalMatrixDevice();
999 row_map_type L_rowmap = lclL.graph.row_map;
1000 auto L_entries = lclL.graph.entries;
1001 auto L_values = lclL.values;
1003 auto lclU = U_->getLocalMatrixDevice();
1004 row_map_type U_rowmap = lclU.graph.row_map;
1005 auto U_entries = lclU.graph.entries;
1006 auto U_values = lclU.values;
1008 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1009 KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
1010 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
1011 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
1013 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
1014 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
1016 L_solver_->compute ();
1017 U_solver_->compute ();
1020 template<
class MatrixType>
1021 void RILUK<MatrixType>::compute_kkspiluk_stream()
1023 for(
int i = 0; i < num_streams_; i++) {
1024 L_v_[i]->resumeFill ();
1025 U_v_[i]->resumeFill ();
1027 L_v_[i]->setAllToScalar (STS::zero ());
1028 U_v_[i]->setAllToScalar (STS::zero ());
1030 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1031 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1032 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1033 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1034 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1035 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1036 std::vector<kk_handle_type *> KernelHandle_rawptr_v_(num_streams_);
1037 for(
int i = 0; i < num_streams_; i++) {
1038 auto lclL = L_v_[i]->getLocalMatrixDevice();
1039 L_rowmap_v[i] = lclL.graph.row_map;
1040 L_entries_v[i] = lclL.graph.entries;
1041 L_values_v[i] = lclL.values;
1043 auto lclU = U_v_[i]->getLocalMatrixDevice();
1044 U_rowmap_v[i] = lclU.graph.row_map;
1045 U_entries_v[i] = lclU.graph.entries;
1046 U_values_v[i] = lclU.values;
1047 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1051 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1054 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1055 const auto A_nrows = lclMtx.numRows();
1056 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1057 ? (A_nrows / num_streams_)
1058 : (A_nrows / num_streams_ + 1);
1059 for(
int i = 0; i < num_streams_; i++) {
1060 const auto start_row_offset = i * rows_per_block;
1061 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1062 auto colindices = A_local_diagblks_entries_v_[i];
1063 auto values = A_local_diagblks_values_v_[i];
1064 const bool reordered = hasStreamReordered_;
1065 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] :
typename lno_nonzero_view_t::non_const_type{};
1066 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1067 Kokkos::parallel_for(pol, KOKKOS_LAMBDA (
const typename TeamPolicy::member_type &team) {
1068 const auto irow = team.league_rank();
1069 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1070 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1071 const auto begin_row = rowptrs(irow);
1072 const auto num_entries = rowptrs(irow + 1) - begin_row;
1073 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](
const int j) {
1074 const auto colidx = colindices(begin_row + j);
1075 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1077 const int offset = KokkosSparse::findRelOffset(
1078 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0,
false);
1079 values(begin_row + j) = A_local_crs_row.value(offset);
1085 KokkosSparse::Experimental::spiluk_numeric_streams( exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1086 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1087 L_rowmap_v, L_entries_v, L_values_v,
1088 U_rowmap_v, U_entries_v, U_values_v );
1089 for(
int i = 0; i < num_streams_; i++) {
1090 L_v_[i]->fillComplete ();
1091 U_v_[i]->fillComplete ();
1094 L_solver_->compute ();
1095 U_solver_->compute ();
1098 template<
class MatrixType>
1103 using Teuchos::rcp_const_cast;
1104 using Teuchos::rcp_dynamic_cast;
1107 const char prefix[] =
"Ifpack2::RILUK::compute: ";
1113 (A_.
is_null (), std::runtime_error, prefix <<
"The matrix is null. Please "
1114 "call setMatrix() with a nonnull input before calling this method.");
1116 (! A_->isFillComplete (), std::runtime_error, prefix <<
"The matrix is not "
1117 "fill complete. You may not invoke initialize() or compute() with this "
1118 "matrix until the matrix is fill complete. If your matrix is a "
1119 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1120 "range Maps, if appropriate) before calling this method.");
1122 if (! isInitialized ()) {
1130 double startTime = timer.
wallTime();
1132 isComputed_ =
false;
1134 if (!this->isKokkosKernelsSpiluk_) {
1139 if(!A_local_crs_nc_.is_null()) {
1140 A_local_crs_nc_->resumeFill();
1142 Array<size_t> entriesPerRow(numRows);
1144 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
1147 nonconst_local_inds_host_view_type indices(
"indices",A_local_->getLocalMaxNumRowEntries());
1148 nonconst_values_host_view_type values(
"values",A_local_->getLocalMaxNumRowEntries());
1150 size_t numEntries = 0;
1151 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1152 A_local_crs_nc_->replaceLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
1154 A_local_crs_nc_->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
1157 if (!isKokkosKernelsStream_) {
1161 compute_kkspiluk_stream();
1167 computeTime_ += (timer.
wallTime() - startTime);
1171 template <
typename MV,
typename Map>
1172 void resetMultiVecIfNeeded(std::unique_ptr<MV> &mv_ptr,
const Map &map,
const size_t numVectors,
bool initialize)
1174 if(!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1175 mv_ptr.reset(
new MV(map, numVectors, initialize));
1180 template<
class MatrixType>
1183 apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1184 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1190 using Teuchos::rcpFromRef;
1193 A_.
is_null (), std::runtime_error,
"Ifpack2::RILUK::apply: The matrix is "
1194 "null. Please call setMatrix() with a nonnull input, then initialize() "
1195 "and compute(), before calling this method.");
1197 ! isComputed (), std::runtime_error,
1198 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1199 "you must call compute() before calling this method.");
1200 TEUCHOS_TEST_FOR_EXCEPTION(
1201 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1202 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1203 "X.getNumVectors() = " << X.getNumVectors ()
1204 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
1205 TEUCHOS_TEST_FOR_EXCEPTION(
1207 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1208 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1209 "fixed. There is a FIXME in this file about this very issue.");
1210 #ifdef HAVE_IFPACK2_DEBUG
1212 if (!isKokkosKernelsStream_) {
1214 TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1219 for (
size_t j = 0; j < X.getNumVectors (); ++j) {
1220 if (STM::isnaninf (norms[j])) {
1225 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1227 #endif // HAVE_IFPACK2_DEBUG
1233 double startTime = timer.
wallTime();
1236 if (alpha == one && beta == zero) {
1237 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && hasStreamReordered_) {
1238 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(),
false);
1239 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(),
false);
1241 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1242 auto X_j = X.getVector(j);
1243 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1244 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1245 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1248 for(
int i = 0; i < num_streams_; i++) {
1249 auto perm_i = perm_v_[i];
1250 stream_end = stream_begin + perm_i.extent(0);
1251 auto X_lcl_sub = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1252 auto ReorderedX_lcl_sub = Kokkos::subview (ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1253 Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA (
const int& ii ) {
1254 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1256 stream_begin = stream_end;
1262 L_solver_->apply (*reordered_x_, Y, mode);
1264 U_solver_->apply (Y, *reordered_y_, mode);
1268 U_solver_->apply (*reordered_x_, Y, mode);
1270 L_solver_->apply (Y, *reordered_y_, mode);
1273 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1274 auto Y_j = Y.getVectorNonConst(j);
1275 auto ReorderedY_j = reordered_y_->getVector(j);
1276 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1277 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1280 for(
int i = 0; i < num_streams_; i++) {
1281 auto perm_i = perm_v_[i];
1282 stream_end = stream_begin + perm_i.extent(0);
1283 auto Y_lcl_sub = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1284 auto ReorderedY_lcl_sub = Kokkos::subview (ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1285 Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA (
const int& ii ) {
1286 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1288 stream_begin = stream_end;
1295 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1299 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1302 L_solver_->apply (X, *Y_tmp_, mode);
1304 if (!this->isKokkosKernelsSpiluk_) {
1307 Y_tmp_->elementWiseMultiply (one, *D_, *Y_tmp_, zero);
1310 U_solver_->apply (*Y_tmp_, Y, mode);
1313 L_solver_->apply (X, Y, mode);
1315 if (!this->isKokkosKernelsSpiluk_) {
1318 Y.elementWiseMultiply (one, *D_, Y, zero);
1321 U_solver_->apply (Y, Y, mode);
1325 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1329 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1332 U_solver_->apply (X, *Y_tmp_, mode);
1334 if (!this->isKokkosKernelsSpiluk_) {
1340 Y_tmp_->elementWiseMultiply (one, *D_, *Y_tmp_, zero);
1343 L_solver_->apply (*Y_tmp_, Y, mode);
1346 U_solver_->apply (X, Y, mode);
1348 if (!this->isKokkosKernelsSpiluk_) {
1354 Y.elementWiseMultiply (one, *D_, Y, zero);
1357 L_solver_->apply (Y, Y, mode);
1363 if (alpha == zero) {
1373 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1374 apply (X, *Y_tmp_, mode);
1375 Y.update (alpha, *Y_tmp_, beta);
1380 #ifdef HAVE_IFPACK2_DEBUG
1385 for (
size_t j = 0; j < Y.getNumVectors (); ++j) {
1386 if (STM::isnaninf (norms[j])) {
1391 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1393 #endif // HAVE_IFPACK2_DEBUG
1396 applyTime_ += (timer.
wallTime() - startTime);
1433 template<
class MatrixType>
1436 std::ostringstream os;
1441 os <<
"\"Ifpack2::RILUK\": {";
1442 os <<
"Initialized: " << (isInitialized () ?
"true" :
"false") <<
", "
1443 <<
"Computed: " << (isComputed () ?
"true" :
"false") <<
", ";
1445 os <<
"Level-of-fill: " << getLevelOfFill() <<
", ";
1447 if(isKokkosKernelsSpiluk_) os<<
"KK-SPILUK, ";
1448 if(isKokkosKernelsStream_) os<<
"KK-Stream, ";
1451 os <<
"Matrix: null";
1454 os <<
"Global matrix dimensions: ["
1455 << A_->getGlobalNumRows () <<
", " << A_->getGlobalNumCols () <<
"]"
1456 <<
", Global nnz: " << A_->getGlobalNumEntries();
1459 if (! L_solver_.is_null ()) os <<
", " << L_solver_->description ();
1460 if (! U_solver_.is_null ()) os <<
", " << U_solver_->description ();
1468 #define IFPACK2_RILUK_INSTANT(S,LO,GO,N) \
1469 template class Ifpack2::RILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_RILUK_def.hpp:241
node_type::execution_space execution_space
The Kokkos execution space of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:243
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
Definition: Ifpack2_RILUK_def.hpp:448
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:231
T & get(const std::string &name, T def_value)
bool nonnull(const std::shared_ptr< T > &p)
const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > & getD() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:178
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:481
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_RILUK_decl.hpp:249
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_RILUK_def.hpp:125
std::string description() const
A one-line description of this object.
Definition: Ifpack2_RILUK_def.hpp:1434
void setParameters(const Teuchos::ParameterList ¶ms)
Definition: Ifpack2_RILUK_def.hpp:306
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:213
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_RILUK_def.hpp:205
Teuchos::RCP< const crs_matrix_type > getCrsMatrix() const
Return the input matrix A as a Tpetra::CrsMatrix, if possible; else throws.
Definition: Ifpack2_RILUK_def.hpp:441
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_RILUK_decl.hpp:252
const crs_matrix_type & getU() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:192
bool isParameter(const std::string &name) const
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:234
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_RILUK_def.hpp:222
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:228
virtual ~RILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_RILUK_def.hpp:98
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:46
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_RILUK_def.hpp:1099
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_RILUK_def.hpp:1183
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_RILUK_decl.hpp:237
void resize(size_type new_size, const value_type &x=value_type())
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:67
IntegralType getIntegralValue(const std::string &str, const std::string ¶mName="", const std::string &sublistName="") const
Teuchos::RCP< const row_matrix_type > getMatrix() const
Get the input matrix.
Definition: Ifpack2_RILUK_def.hpp:434
bool isType(const std::string &name) const
Declaration of MDF interface.
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:128
const crs_matrix_type & getL() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_RILUK_def.hpp:161
std::string typeName(const T &t)
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:225