10 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
11 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
13 #include "Tpetra_BlockMultiVector.hpp"
14 #include "Tpetra_BlockView.hpp"
15 #include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"
16 #include "Ifpack2_OverlappingRowMatrix.hpp"
17 #include "Ifpack2_Details_getCrsMatrix.hpp"
18 #include "Ifpack2_LocalFilter.hpp"
20 #include "Ifpack2_RILUK.hpp"
21 #include "KokkosSparse_sptrsv.hpp"
26 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
27 #include "KokkosBatched_Gemm_Decl.hpp"
28 #include "KokkosBatched_Gemm_Serial_Impl.hpp"
29 #include "KokkosBatched_Util.hpp"
34 namespace Experimental {
37 template <
class MatrixType>
38 struct LocalRowHandler {
39 using LocalOrdinal =
typename MatrixType::local_ordinal_type;
40 using row_matrix_type = Tpetra::RowMatrix<
41 typename MatrixType::scalar_type,
43 typename MatrixType::global_ordinal_type,
44 typename MatrixType::node_type>;
45 using inds_type =
typename row_matrix_type::local_inds_host_view_type;
46 using vals_type =
typename row_matrix_type::values_host_view_type;
50 if (!A_->supportsRowViews()) {
51 const auto maxNumRowEntr = A_->getLocalMaxNumRowEntries();
52 const auto blockSize = A_->getBlockSize();
53 ind_nc_ = inds_type_nc(
"Ifpack2::RBILUK::LocalRowHandler::indices", maxNumRowEntr);
54 val_nc_ = vals_type_nc(
"Ifpack2::RBILUK::LocalRowHandler::values", maxNumRowEntr * blockSize * blockSize);
58 void getLocalRow(LocalOrdinal local_row, inds_type& InI, vals_type& InV, LocalOrdinal& NumIn) {
59 if (A_->supportsRowViews()) {
60 A_->getLocalRowView(local_row, InI, InV);
61 NumIn = (LocalOrdinal)InI.size();
64 A_->getLocalRowCopy(local_row, ind_nc_, val_nc_, cnt);
67 NumIn = (LocalOrdinal)cnt;
72 using inds_type_nc =
typename row_matrix_type::nonconst_local_inds_host_view_type;
73 using vals_type_nc =
typename row_matrix_type::nonconst_values_host_view_type;
80 template <
typename BsrMatrixType,
typename CrsMatrixType>
81 CrsMatrixType convertBsrToCrs(
const BsrMatrixType& bsrMatrix) {
82 using Ordinal =
typename BsrMatrixType::ordinal_type;
83 using RowMapType =
typename BsrMatrixType::row_map_type::non_const_type;
84 using EntriesType =
typename BsrMatrixType::index_type::non_const_type;
85 using ValuesType =
typename BsrMatrixType::values_type::non_const_type;
86 using ExeSpace =
typename BsrMatrixType::execution_space;
88 const Ordinal blockDim = bsrMatrix.blockDim();
89 const Ordinal blockSize = blockDim * blockDim;
90 const Ordinal numBlockRows = bsrMatrix.numRows();
91 const Ordinal numBlockCols = bsrMatrix.numCols();
93 const auto blockRowMap = bsrMatrix.graph.row_map;
94 const auto blockEntries = bsrMatrix.graph.entries;
95 const auto blockValues = bsrMatrix.values;
97 const Ordinal numRows = numBlockRows * blockDim;
98 const Ordinal numCols = numBlockCols * blockDim;
101 RowMapType crsRowMap(
"crsRowMap", numRows + 1);
102 EntriesType crsEntries(
"crsEntries", blockEntries.extent(0) * blockSize);
103 ValuesType crsValues(
"crsValues", blockValues.extent(0) * blockSize);
105 Kokkos::RangePolicy<ExeSpace> policy(0, numBlockRows);
108 Kokkos::parallel_for(
109 "ConvertBsrToCrs", policy, KOKKOS_LAMBDA(
const Ordinal blockRow) {
110 const Ordinal blockRowStart = blockRowMap(blockRow);
111 const Ordinal blockRowEnd = blockRowMap(blockRow + 1);
112 const Ordinal blockRowCount = blockRowEnd - blockRowStart;
115 for (Ordinal blockNnz = blockRowStart; blockNnz < blockRowEnd; ++blockNnz) {
116 const Ordinal blockCol = blockEntries(blockNnz);
117 const Ordinal blockNum = blockNnz - blockRowStart;
120 for (Ordinal blockRowOffset = 0; blockRowOffset < blockDim; ++blockRowOffset) {
121 const Ordinal crsRow = blockRow * blockDim + blockRowOffset;
123 const Ordinal crsRowStart = blockRowStart * blockSize + blockRowCount * blockDim * blockRowOffset;
124 crsRowMap(crsRow) = crsRowStart;
127 for (Ordinal blockColOffset = 0; blockColOffset < blockDim; ++blockColOffset) {
128 const Ordinal crsCol = blockCol * blockDim + blockColOffset;
129 const Ordinal crsNnz = crsRowStart + blockNum * blockDim + blockColOffset;
130 crsEntries(crsNnz) = crsCol;
131 crsValues(crsNnz) = blockValues(blockNnz * blockSize + blockRowOffset * blockDim + blockColOffset);
138 Kokkos::RangePolicy<ExeSpace> policy2(0, 1);
139 Kokkos::parallel_for(
140 "FinalizeCrs", policy2, KOKKOS_LAMBDA(
const Ordinal) {
141 crsRowMap(numRows) = blockRowMap(numBlockRows) * blockSize;
145 return CrsMatrixType(
"crsMatrix", numRows, numCols, crsEntries.extent(0), crsValues, crsRowMap, crsEntries);
150 template <
class MatrixType>
154 template <
class MatrixType>
158 template <
class MatrixType>
161 template <
class MatrixType>
170 this->isAllocated_ =
false;
171 this->isInitialized_ =
false;
172 this->isComputed_ =
false;
173 this->Graph_ = Teuchos::null;
174 L_block_ = Teuchos::null;
175 U_block_ = Teuchos::null;
176 D_block_ = Teuchos::null;
180 template <
class MatrixType>
181 const typename RBILUK<MatrixType>::block_crs_matrix_type&
184 L_block_.is_null(), std::runtime_error,
185 "Ifpack2::RILUK::getL: The L factor "
186 "is null. Please call initialize() (and preferably also compute()) "
187 "before calling this method. If the input matrix has not yet been set, "
188 "you must first call setMatrix() with a nonnull input matrix before you "
189 "may call initialize() or compute().");
193 template <
class MatrixType>
194 const typename RBILUK<MatrixType>::block_crs_matrix_type&
197 D_block_.is_null(), std::runtime_error,
198 "Ifpack2::RILUK::getD: The D factor "
199 "(of diagonal entries) is null. Please call initialize() (and "
200 "preferably also compute()) before calling this method. If the input "
201 "matrix has not yet been set, you must first call setMatrix() with a "
202 "nonnull input matrix before you may call initialize() or compute().");
206 template <
class MatrixType>
207 const typename RBILUK<MatrixType>::block_crs_matrix_type&
210 U_block_.is_null(), std::runtime_error,
211 "Ifpack2::RILUK::getU: The U factor "
212 "is null. Please call initialize() (and preferably also compute()) "
213 "before calling this method. If the input matrix has not yet been set, "
214 "you must first call setMatrix() with a nonnull input matrix before you "
215 "may call initialize() or compute().");
219 template <
class MatrixType>
224 if (!this->isAllocated_) {
225 if (!this->isKokkosKernelsStream_) {
237 L_block_ =
rcp(
new block_crs_matrix_type(*this->Graph_->getL_Graph(), blockSize_));
238 U_block_ =
rcp(
new block_crs_matrix_type(*this->Graph_->getU_Graph(), blockSize_));
239 D_block_ =
rcp(
new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
241 L_block_->setAllToScalar(STM::zero());
242 U_block_->setAllToScalar(STM::zero());
243 D_block_->setAllToScalar(STM::zero());
246 if (this->isKokkosKernelsSpiluk_) {
247 const auto numRows = L_block_->getLocalNumRows();
248 tmp_ = view1d(
"RBILUK::tmp_", numRows * blockSize_);
254 L_block_v_ = std::vector<Teuchos::RCP<block_crs_matrix_type> >(this->num_streams_);
255 U_block_v_ = std::vector<Teuchos::RCP<block_crs_matrix_type> >(this->num_streams_);
256 for (
int i = 0; i < this->num_streams_; i++) {
257 L_block_v_[i] =
rcp(
new block_crs_matrix_type(*this->Graph_v_[i]->getL_Graph(), blockSize_));
258 U_block_v_[i] =
rcp(
new block_crs_matrix_type(*this->Graph_v_[i]->getU_Graph(), blockSize_));
259 L_block_v_[i]->setAllToScalar(STS::zero());
260 U_block_v_[i]->setAllToScalar(STS::zero());
264 auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
265 const auto numRows = lclMtx.numRows();
266 tmp_ = view1d(
"RBILUK::tmp_", numRows * blockSize_);
269 this->isAllocated_ =
true;
274 template <
class MatrixType>
276 getBlockCrsGraph(
const Teuchos::RCP<
const typename RBILUK<MatrixType>::row_matrix_type>& A) {
277 using local_ordinal_type =
typename MatrixType::local_ordinal_type;
280 using Teuchos::rcp_const_cast;
281 using Teuchos::rcp_dynamic_cast;
282 using Teuchos::rcpFromRef;
283 using row_matrix_type =
typename RBILUK<MatrixType>::row_matrix_type;
284 using crs_graph_type =
typename RBILUK<MatrixType>::crs_graph_type;
285 using block_crs_matrix_type =
typename RBILUK<MatrixType>::block_crs_matrix_type;
290 RCP<const LocalFilter<row_matrix_type> > filteredA =
291 rcp_dynamic_cast<
const LocalFilter<row_matrix_type> >(A_local);
292 RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = Teuchos::null;
293 RCP<const block_crs_matrix_type> A_block = Teuchos::null;
294 if (!filteredA.is_null()) {
295 overlappedA = rcp_dynamic_cast<
const OverlappingRowMatrix<row_matrix_type> >(filteredA->getUnderlyingMatrix());
298 if (!overlappedA.is_null()) {
299 A_block = rcp_dynamic_cast<
const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
300 }
else if (!filteredA.is_null()) {
302 A_block = rcp_dynamic_cast<
const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
304 A_block = rcp_dynamic_cast<
const block_crs_matrix_type>(A_local);
307 if (!A_block.is_null()) {
308 return rcpFromRef(A_block->getCrsGraph());
314 local_ordinal_type numRows = A_local->getLocalNumRows();
316 for (local_ordinal_type i = 0; i < numRows; i++) {
317 entriesPerRow[i] = A_local->getNumEntriesInLocalRow(i);
319 RCP<crs_graph_type> A_local_crs_nc =
320 rcp(
new crs_graph_type(A_local->getRowMap(),
321 A_local->getColMap(),
325 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
326 LocalRowHandler_t localRowHandler(A_local);
327 typename LocalRowHandler_t::inds_type indices;
328 typename LocalRowHandler_t::vals_type values;
329 for (local_ordinal_type i = 0; i < numRows; i++) {
330 local_ordinal_type numEntries = 0;
331 localRowHandler.getLocalRow(i, indices, values, numEntries);
332 A_local_crs_nc->insertLocalIndices(i, numEntries, indices.data());
336 A_local_crs_nc->fillComplete(A_local->getDomainMap(), A_local->getRangeMap());
337 return rcp_const_cast<
const crs_graph_type>(A_local_crs_nc);
343 template <
class MatrixType>
348 using Teuchos::rcp_dynamic_cast;
350 using crs_map_type = Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>;
352 const char prefix[] =
"Ifpack2::Experimental::RBILUK::initialize: ";
355 "RowMatrix) is null. Please call setMatrix() with a nonnull input "
356 "before calling this method.");
358 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
359 "initialize() or compute() with this matrix until the matrix is fill "
360 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
361 "underlying graph is fill complete.");
363 blockSize_ = this->A_->getBlockSize();
364 this->A_local_ = this->makeLocalFilter(this->A_);
367 double startTime = timer.
wallTime();
378 this->isInitialized_ =
false;
379 this->isAllocated_ =
false;
380 this->isComputed_ =
false;
381 this->Graph_ = Teuchos::null;
383 if (this->isKokkosKernelsSpiluk_) {
384 A_local_bcrs_ = Details::getBcrsMatrix(this->A_local_);
385 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->A_local_);
386 if (A_local_bcrs_.is_null()) {
387 if (A_local_crs.is_null()) {
389 Array<size_t> entriesPerRow(numRows);
391 entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
395 this->A_local_->getColMap(),
398 nonconst_local_inds_host_view_type indices(
"indices", this->A_local_->getLocalMaxNumRowEntries());
399 nonconst_values_host_view_type values(
"values", this->A_local_->getLocalMaxNumRowEntries());
401 size_t numEntries = 0;
402 this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
403 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
405 A_local_crs_nc_->fillComplete(this->A_local_->getDomainMap(), this->A_local_->getRangeMap());
406 A_local_crs = Teuchos::rcp_const_cast<
const crs_matrix_type>(A_local_crs_nc_);
410 if (blockSize_ > 1) {
411 auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
412 A_local_bcrs_ = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
414 A_local_bcrs_ = Tpetra::convertToBlockCrsMatrix(*A_local_crs, blockSize_);
419 if (this->isKokkosKernelsStream_) {
420 std::vector<int> weights(this->num_streams_);
421 std::fill(weights.begin(), weights.end(), 1);
422 this->exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
424 auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
425 this->Graph_v_ = std::vector<Teuchos::RCP<Ifpack2::IlukGraph<crs_graph_type, kk_handle_type> > >(this->num_streams_);
426 A_block_local_diagblks_v_ = std::vector<local_matrix_device_type>(this->num_streams_);
427 std::vector<local_crs_matrix_device_type> A_crs_local_diagblks_v(this->num_streams_);
429 if (!this->hasStreamReordered_) {
430 auto lclCrsMtx = convertBsrToCrs<local_matrix_device_type, local_crs_matrix_device_type>(lclMtx);
431 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclCrsMtx, A_crs_local_diagblks_v);
436 for (
int i = 0; i < this->num_streams_; i++) {
437 A_block_local_diagblks_v_[i] = local_matrix_device_type(A_crs_local_diagblks_v[i], blockSize_);
440 A_block_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(this->num_streams_);
441 A_block_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(this->num_streams_);
442 A_block_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(this->num_streams_);
444 for (
int i = 0; i < this->num_streams_; i++) {
445 A_block_local_diagblks_rowmap_v_[i] = A_block_local_diagblks_v_[i].graph.row_map;
446 A_block_local_diagblks_entries_v_[i] = A_block_local_diagblks_v_[i].graph.entries;
447 A_block_local_diagblks_values_v_[i] = A_block_local_diagblks_v_[i].values;
450 A_block_local_diagblks_v_[i].numRows(),
451 A_local_bcrs_->getRowMap()->getComm()));
453 A_block_local_diagblks_v_[i].numCols(),
454 A_local_bcrs_->getColMap()->getComm()));
458 this->LevelOfFill_, 0));
461 RCP<const crs_graph_type> matrixCrsGraph = getBlockCrsGraph<MatrixType>(this->A_);
463 this->LevelOfFill_, 0));
466 if (this->isKokkosKernelsSpiluk_) {
467 if (!this->isKokkosKernelsStream_) {
468 KernelHandle_block_ =
Teuchos::rcp(
new kk_handle_type());
469 const auto numRows = this->A_local_->getLocalNumRows();
470 KernelHandle_block_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
472 2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
473 2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
475 this->Graph_->initialize(KernelHandle_block_);
477 L_Sptrsv_KernelHandle_ =
Teuchos::rcp(
new kk_handle_type());
478 U_Sptrsv_KernelHandle_ =
Teuchos::rcp(
new kk_handle_type());
480 KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
482 L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows,
true , blockSize_);
483 U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows,
false , blockSize_);
485 KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
486 KernelHandle_block_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(this->num_streams_);
487 L_Sptrsv_KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(this->num_streams_);
488 U_Sptrsv_KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(this->num_streams_);
489 for (
int i = 0; i < this->num_streams_; i++) {
490 const auto numRows = A_block_local_diagblks_v_[i].numRows();
491 KernelHandle_block_v_[i] =
Teuchos::rcp(
new kk_handle_type());
492 KernelHandle_block_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
494 2 * A_block_local_diagblks_v_[i].nnz() * (this->LevelOfFill_ + 1),
495 2 * A_block_local_diagblks_v_[i].nnz() * (this->LevelOfFill_ + 1),
497 this->Graph_v_[i]->initialize(KernelHandle_block_v_[i]);
499 L_Sptrsv_KernelHandle_v_[i] =
Teuchos::rcp(
new kk_handle_type());
500 U_Sptrsv_KernelHandle_v_[i] =
Teuchos::rcp(
new kk_handle_type());
502 L_Sptrsv_KernelHandle_v_[i]->create_sptrsv_handle(alg, numRows,
true , blockSize_);
503 U_Sptrsv_KernelHandle_v_[i]->create_sptrsv_handle(alg, numRows,
false , blockSize_);
507 this->Graph_->initialize();
510 allocate_L_and_U_blocks();
512 #ifdef IFPACK2_RBILUK_INITIAL
517 this->isInitialized_ =
true;
518 this->numInitialize_ += 1;
519 this->initializeTime_ += (timer.
wallTime() - startTime);
522 template <
class MatrixType>
526 typedef Tpetra::Map<LO, GO, node_type> map_type;
528 LO NumIn = 0, NumL = 0, NumU = 0;
529 bool DiagFound =
false;
530 size_t NumNonzeroDiags = 0;
531 size_t MaxNumEntries = this->A_->getLocalMaxNumRowEntries();
532 LO blockMatSize = blockSize_ * blockSize_;
539 bool gidsAreConsistentlyOrdered =
true;
540 GO indexOfInconsistentGID = 0;
541 for (GO i = 0; i < rowGIDs.
size(); ++i) {
542 if (rowGIDs[i] != colGIDs[i]) {
543 gidsAreConsistentlyOrdered =
false;
544 indexOfInconsistentGID = i;
549 "The ordering of the local GIDs in the row and column maps is not the same"
551 <<
"at index " << indexOfInconsistentGID
552 <<
". Consistency is required, as all calculations are done with"
554 <<
"local indexing.");
565 L_block_->setAllToScalar(STM::zero());
566 U_block_->setAllToScalar(STM::zero());
567 D_block_->setAllToScalar(STM::zero());
584 RCP<const map_type> rowMap = L_block_->getRowMap();
596 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
597 LocalRowHandler_t localRowHandler(this->A_);
598 typename LocalRowHandler_t::inds_type InI;
599 typename LocalRowHandler_t::vals_type InV;
600 for (
size_t myRow = 0; myRow < this->A_->getLocalNumRows(); ++myRow) {
601 LO local_row = myRow;
603 localRowHandler.getLocalRow(local_row, InI, InV, NumIn);
610 for (LO j = 0; j < NumIn; ++j) {
612 const LO blockOffset = blockMatSize * j;
614 if (k == local_row) {
617 for (LO jj = 0; jj < blockMatSize; ++jj)
618 diagValues[jj] = this->Rthresh_ * InV[blockOffset + jj] + IFPACK2_SGN(InV[blockOffset + jj]) * this->Athresh_;
619 D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
622 true, std::runtime_error,
623 "Ifpack2::RILUK::initAllValues: current "
625 << k <<
" < 0. I'm not sure why this is an error; it is "
626 "probably an artifact of the undocumented assumptions of the "
627 "original implementation (likely copied and pasted from Ifpack). "
628 "Nevertheless, the code I found here insisted on this being an error "
629 "state, so I will throw an exception here.");
630 }
else if (k < local_row) {
632 const LO LBlockOffset = NumL * blockMatSize;
633 for (LO jj = 0; jj < blockMatSize; ++jj)
634 LV[LBlockOffset + jj] = InV[blockOffset + jj];
636 }
else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
638 const LO UBlockOffset = NumU * blockMatSize;
639 for (LO jj = 0; jj < blockMatSize; ++jj)
640 UV[UBlockOffset + jj] = InV[blockOffset + jj];
650 for (LO jj = 0; jj < blockSize_; ++jj)
651 diagValues[jj * (blockSize_ + 1)] = this->Athresh_;
652 D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
656 L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
660 U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
675 this->isInitialized_ =
true;
684 template <
class LittleBlockType>
685 struct GetManagedView {
686 static_assert(Kokkos::is_view<LittleBlockType>::value,
687 "LittleBlockType must be a Kokkos::View.");
688 typedef Kokkos::View<
typename LittleBlockType::non_const_data_type,
689 typename LittleBlockType::array_layout,
690 typename LittleBlockType::device_type>
691 managed_non_const_type;
692 static_assert(static_cast<int>(managed_non_const_type::rank) ==
693 static_cast<int>(LittleBlockType::rank),
694 "managed_non_const_type::rank != LittleBlock::rank. "
695 "Please report this bug to the Ifpack2 developers.");
700 template <
class MatrixType>
706 typedef impl_scalar_type IST;
707 const char prefix[] =
"Ifpack2::Experimental::RBILUK::compute: ";
713 "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
714 "input before calling this method.");
716 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
717 "initialize() or compute() with this matrix until the matrix is fill "
718 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
719 "underlying graph is fill complete.");
721 if (!this->isInitialized()) {
744 double startTime = timer.
wallTime();
747 this->isComputed_ =
false;
754 if (!this->isKokkosKernelsSpiluk_) {
757 LO NumL, NumU, NumURead;
760 this->isKokkosKernelsStream_, std::runtime_error,
761 "Ifpack2::RBILUK::compute: "
762 "streams are not yet supported without KokkosKernels.");
765 const size_t MaxNumEntries =
766 L_block_->getLocalMaxNumRowEntries() + U_block_->getLocalMaxNumRowEntries() + 1;
768 const LO blockMatSize = blockSize_ * blockSize_;
773 const LO rowStride = blockSize_;
776 Kokkos::View<
int*, Kokkos::HostSpace,
777 Kokkos::MemoryUnmanaged>
778 ipiv(ipiv_teuchos.
getRawPtr(), blockSize_);
780 Kokkos::View<IST*, Kokkos::HostSpace,
781 Kokkos::MemoryUnmanaged>
782 work(work_teuchos.getRawPtr(), blockSize_);
784 size_t num_cols = U_block_->getColMap()->getLocalNumElements();
787 typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock(
"diagModBlock", blockSize_, blockSize_);
788 typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp(
"matTmp", blockSize_, blockSize_);
789 typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier(
"multiplier", blockSize_, blockSize_);
797 for (
size_t j = 0; j < num_cols; ++j) {
803 const LO numLocalRows = L_block_->getLocalNumRows();
804 for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
807 NumIn = MaxNumEntries;
808 local_inds_host_view_type colValsL;
809 values_host_view_type valsL;
811 L_block_->getLocalRowView(local_row, colValsL, valsL);
812 NumL = (LO)colValsL.size();
813 for (LO j = 0; j < NumL; ++j) {
814 const LO matOffset = blockMatSize * j;
815 little_block_host_type lmat((
typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
816 little_block_host_type lmatV((
typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
818 Tpetra::COPY(lmat, lmatV);
819 InI[j] = colValsL[j];
822 little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
823 little_block_host_type dmatV((
typename little_block_host_type::value_type*)&InV[NumL * blockMatSize], blockSize_, rowStride);
825 Tpetra::COPY(dmat, dmatV);
826 InI[NumL] = local_row;
828 local_inds_host_view_type colValsU;
829 values_host_view_type valsU;
830 U_block_->getLocalRowView(local_row, colValsU, valsU);
831 NumURead = (LO)colValsU.
size();
833 for (LO j = 0; j < NumURead; ++j) {
834 if (!(colValsU[j] < numLocalRows))
continue;
835 InI[NumL + 1 + j] = colValsU[j];
836 const LO matOffset = blockMatSize * (NumL + 1 + j);
837 little_block_host_type umat((
typename little_block_host_type::value_type*)&valsU[blockMatSize * j], blockSize_, rowStride);
838 little_block_host_type umatV((
typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
840 Tpetra::COPY(umat, umatV);
843 NumIn = NumL + NumU + 1;
846 for (
size_t j = 0; j < NumIn; ++j) {
850 #ifndef IFPACK2_RBILUK_INITIAL
851 for (LO i = 0; i < blockSize_; ++i)
852 for (LO j = 0; j < blockSize_; ++j) {
854 diagModBlock(i, j) = 0;
859 Kokkos::deep_copy(diagModBlock, diagmod);
862 for (LO jj = 0; jj < NumL; ++jj) {
864 little_block_host_type currentVal((
typename little_block_host_type::value_type*)&InV[jj * blockMatSize], blockSize_, rowStride);
866 Tpetra::COPY(currentVal, multiplier);
868 const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j, j);
870 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
871 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
872 KokkosBatched::Trans::NoTranspose,
873 KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), currentVal, dmatInverse, STS::zero(), matTmp);
875 Tpetra::GEMM(
"N",
"N", STS::one(), currentVal, dmatInverse,
876 STS::zero(), matTmp);
880 Tpetra::COPY(matTmp, currentVal);
881 local_inds_host_view_type UUI;
882 values_host_view_type UUV;
884 U_block_->getLocalRowView(j, UUI, UUV);
885 NumUU = (LO)UUI.size();
887 if (this->RelaxValue_ == STM::zero()) {
888 for (LO k = 0; k < NumUU; ++k) {
889 if (!(UUI[k] < numLocalRows))
continue;
890 const int kk = colflag[UUI[k]];
892 little_block_host_type kkval((
typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
893 little_block_host_type uumat((
typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
894 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
895 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
896 KokkosBatched::Trans::NoTranspose,
897 KokkosBatched::Algo::Gemm::Blocked>::invoke(
magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
899 Tpetra::GEMM(
"N",
"N",
magnitude_type(-STM::one()), multiplier, uumat,
906 for (LO k = 0; k < NumUU; ++k) {
907 if (!(UUI[k] < numLocalRows))
continue;
908 const int kk = colflag[UUI[k]];
909 little_block_host_type uumat((
typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
911 little_block_host_type kkval((
typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
912 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
913 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
914 KokkosBatched::Trans::NoTranspose,
915 KokkosBatched::Algo::Gemm::Blocked>::invoke(
magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
917 Tpetra::GEMM(
"N",
"N",
magnitude_type(-STM::one()), multiplier, uumat,
922 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
923 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
924 KokkosBatched::Trans::NoTranspose,
925 KokkosBatched::Algo::Gemm::Blocked>::invoke(
magnitude_type(-STM::one()), multiplier, uumat, STM::one(), diagModBlock);
927 Tpetra::GEMM(
"N",
"N",
magnitude_type(-STM::one()), multiplier, uumat,
928 STM::one(), diagModBlock);
941 Tpetra::COPY(dmatV, dmat);
943 if (this->RelaxValue_ != STM::zero()) {
945 Tpetra::AXPY(this->RelaxValue_, diagModBlock, dmat);
959 for (
int k = 0; k < blockSize_; ++k) {
963 Tpetra::GETF2(dmat, ipiv, lapackInfo);
966 lapackInfo != 0, std::runtime_error,
967 "Ifpack2::Experimental::RBILUK::compute: "
969 << lapackInfo <<
" which indicates an error in the factorization GETRF.");
971 Tpetra::GETRI(dmat, ipiv, work, lapackInfo);
974 lapackInfo != 0, std::runtime_error,
975 "Ifpack2::Experimental::RBILUK::compute: "
977 << lapackInfo <<
" which indicates an error in the matrix inverse GETRI.");
980 for (LO j = 0; j < NumU; ++j) {
981 little_block_host_type currentVal((
typename little_block_host_type::value_type*)&InV[(NumL + 1 + j) * blockMatSize], blockSize_, rowStride);
983 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
984 KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
985 KokkosBatched::Trans::NoTranspose,
986 KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), dmat, currentVal, STS::zero(), matTmp);
988 Tpetra::GEMM(
"N",
"N", STS::one(), dmat, currentVal,
989 STS::zero(), matTmp);
993 Tpetra::COPY(matTmp, currentVal);
998 U_block_->replaceLocalValues(local_row, &InI[NumL + 1], &InV[blockMatSize * (NumL + 1)], NumU);
1001 #ifndef IFPACK2_RBILUK_INITIAL
1003 for (
size_t j = 0; j < NumIn; ++j) {
1004 colflag[InI[j]] = -1;
1008 for (
size_t j = 0; j < num_cols; ++j) {
1019 auto A_local_bcrs_temp = Details::getBcrsMatrix(this->A_local_);
1020 auto A_local_crs_temp = Details::getCrsMatrix(this->A_local_);
1021 if (A_local_bcrs_temp.is_null()) {
1022 if (A_local_crs_temp.is_null()) {
1025 A_local_crs_nc_->resumeFill();
1026 nonconst_local_inds_host_view_type indices(
"indices", this->A_local_->getLocalMaxNumRowEntries());
1027 nonconst_values_host_view_type values(
"values", this->A_local_->getLocalMaxNumRowEntries());
1029 size_t numEntries = 0;
1030 this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
1031 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
1033 A_local_crs_nc_->fillComplete(this->A_local_->getDomainMap(), this->A_local_->getRangeMap());
1036 if (blockSize_ > 1) {
1037 auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs_nc_, blockSize_);
1038 Kokkos::deep_copy(A_local_bcrs_->getLocalMatrixDevice().values,
1039 crs_matrix_block_filled->getLocalMatrixDevice().values);
1041 Kokkos::deep_copy(A_local_bcrs_->getLocalMatrixDevice().values,
1042 A_local_crs_nc_->getLocalMatrixDevice().values);
1046 if (!this->isKokkosKernelsStream_) {
1047 auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
1048 auto A_local_rowmap = lclMtx.graph.row_map;
1049 auto A_local_entries = lclMtx.graph.entries;
1050 auto A_local_values = lclMtx.values;
1055 if (L_block_->isLocallyIndexed()) {
1056 L_block_->setAllToScalar(STS::zero());
1057 U_block_->setAllToScalar(STS::zero());
1060 using row_map_type =
typename local_matrix_device_type::row_map_type;
1062 auto lclL = L_block_->getLocalMatrixDevice();
1063 row_map_type L_rowmap = lclL.graph.row_map;
1064 auto L_entries = lclL.graph.entries;
1065 auto L_values = lclL.values;
1067 auto lclU = U_block_->getLocalMatrixDevice();
1068 row_map_type U_rowmap = lclU.graph.row_map;
1069 auto U_entries = lclU.graph.entries;
1070 auto U_values = lclU.values;
1072 KokkosSparse::spiluk_numeric(KernelHandle_block_.getRawPtr(), this->LevelOfFill_,
1073 A_local_rowmap, A_local_entries, A_local_values,
1074 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
1077 KokkosSparse::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
1078 KokkosSparse::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
1081 assert(!this->hasStreamReordered_);
1082 auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
1083 auto lclCrsMtx = convertBsrToCrs<local_matrix_device_type, local_crs_matrix_device_type>(lclMtx);
1084 std::vector<local_crs_matrix_device_type> A_crs_local_diagblks_v(this->num_streams_);
1085 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclCrsMtx, A_crs_local_diagblks_v);
1087 for (
int i = 0; i < this->num_streams_; i++) {
1088 A_block_local_diagblks_v_[i] = local_matrix_device_type(A_crs_local_diagblks_v[i], blockSize_);
1090 A_block_local_diagblks_values_v_[i] = A_block_local_diagblks_v_[i].values;
1093 std::vector<lno_row_view_t> L_rowmap_v(this->num_streams_);
1094 std::vector<lno_nonzero_view_t> L_entries_v(this->num_streams_);
1095 std::vector<scalar_nonzero_view_t> L_values_v(this->num_streams_);
1096 std::vector<lno_row_view_t> U_rowmap_v(this->num_streams_);
1097 std::vector<lno_nonzero_view_t> U_entries_v(this->num_streams_);
1098 std::vector<scalar_nonzero_view_t> U_values_v(this->num_streams_);
1099 std::vector<kk_handle_type*> KernelHandle_rawptr_v_(this->num_streams_);
1101 for (
int i = 0; i < this->num_streams_; i++) {
1102 L_block_v_[i]->setAllToScalar(STS::zero());
1103 U_block_v_[i]->setAllToScalar(STS::zero());
1105 auto lclL = L_block_v_[i]->getLocalMatrixDevice();
1106 L_rowmap_v[i] = lclL.graph.row_map;
1107 L_entries_v[i] = lclL.graph.entries;
1108 L_values_v[i] = lclL.values;
1110 auto lclU = U_block_v_[i]->getLocalMatrixDevice();
1111 U_rowmap_v[i] = lclU.graph.row_map;
1112 U_entries_v[i] = lclU.graph.entries;
1113 U_values_v[i] = lclU.values;
1114 KernelHandle_rawptr_v_[i] = KernelHandle_block_v_[i].getRawPtr();
1120 KokkosSparse::Experimental::spiluk_numeric_streams(
1121 this->exec_space_instances_, KernelHandle_rawptr_v_, this->LevelOfFill_,
1122 A_block_local_diagblks_rowmap_v_, A_block_local_diagblks_entries_v_, A_block_local_diagblks_values_v_,
1123 L_rowmap_v, L_entries_v, L_values_v,
1124 U_rowmap_v, U_entries_v, U_values_v);
1127 for (
int i = 0; i < this->num_streams_; i++) {
1128 KokkosSparse::sptrsv_symbolic(L_Sptrsv_KernelHandle_v_[i].getRawPtr(), L_rowmap_v[i], L_entries_v[i], L_values_v[i]);
1129 KokkosSparse::sptrsv_symbolic(U_Sptrsv_KernelHandle_v_[i].getRawPtr(), U_rowmap_v[i], U_entries_v[i], U_values_v[i]);
1150 this->isComputed_ =
true;
1151 this->numCompute_ += 1;
1152 this->computeTime_ += (timer.
wallTime() - startTime);
1155 template <
class MatrixType>
1156 template <
typename X,
typename Y>
1159 std::vector<lno_row_view_t> ptr_v(this->num_streams_);
1160 std::vector<lno_nonzero_view_t> ind_v(this->num_streams_);
1161 std::vector<scalar_nonzero_view_t> val_v(this->num_streams_);
1162 std::vector<kk_handle_type*> KernelHandle_rawptr_v(this->num_streams_);
1164 for (
int j = 0; j < numVecs; ++j) {
1165 auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), j);
1166 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), j);
1167 std::vector<decltype(X_view)> x_v(this->num_streams_);
1168 std::vector<decltype(Y_view)> y_v(this->num_streams_);
1169 local_ordinal_type stream_begin = 0;
1170 local_ordinal_type stream_end;
1171 for (
int i = 0; i < this->num_streams_; i++) {
1172 auto LU_bcrs_i = LU_block_v[i];
1173 auto LUlocal_i = LU_bcrs_i->getLocalMatrixDevice();
1174 ptr_v[i] = LUlocal_i.graph.row_map;
1175 ind_v[i] = LUlocal_i.graph.entries;
1176 val_v[i] = LUlocal_i.values;
1177 stream_end = stream_begin + LUlocal_i.numRows() * blockSize_;
1179 x_v[i] = Kokkos::subview(tmp_, Kokkos::make_pair(stream_begin, stream_end));
1181 x_v[i] = Kokkos::subview(X_view, Kokkos::make_pair(stream_begin, stream_end));
1184 y_v[i] = Kokkos::subview(tmp_, Kokkos::make_pair(stream_begin, stream_end));
1186 y_v[i] = Kokkos::subview(Y_view, Kokkos::make_pair(stream_begin, stream_end));
1188 KernelHandle_rawptr_v[i] = LU_sptrsv_handle_v[i].getRawPtr();
1189 stream_begin = stream_end;
1193 KokkosSparse::Experimental::sptrsv_solve_streams(this->exec_space_instances_, KernelHandle_rawptr_v, ptr_v, ind_v, val_v, x_v, y_v);
1198 template <
class MatrixType>
1200 apply(
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1201 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1211 this->A_.
is_null(), std::runtime_error,
1212 "Ifpack2::Experimental::RBILUK::apply: The matrix is "
1213 "null. Please call setMatrix() with a nonnull input, then initialize() "
1214 "and compute(), before calling this method.");
1216 !this->isComputed(), std::runtime_error,
1217 "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
1218 "you must call compute() before calling this method.");
1219 TEUCHOS_TEST_FOR_EXCEPTION(
1220 X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1221 "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
1222 "X.getNumVectors() = "
1223 << X.getNumVectors()
1224 <<
" != Y.getNumVectors() = " << Y.getNumVectors() <<
".");
1225 TEUCHOS_TEST_FOR_EXCEPTION(
1227 "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1228 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1229 "fixed. There is a FIXME in this file about this very issue.");
1231 const LO blockMatSize = blockSize_ * blockSize_;
1233 const LO rowStride = blockSize_;
1236 little_host_vec_type lclvec((
typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
1237 const scalar_type one = STM::one();
1238 const scalar_type zero = STM::zero();
1241 double startTime = timer.
wallTime();
1244 if (!this->isKokkosKernelsSpiluk_) {
1245 BMV yBlock(Y, *(this->Graph_->getA_Graph()->getDomainMap()), blockSize_);
1246 const BMV xBlock(X, *(this->A_->getColMap()), blockSize_);
1248 if (alpha == one && beta == zero) {
1256 const LO numVectors = xBlock.getNumVectors();
1257 BMV cBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
1258 BMV rBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
1259 for (LO imv = 0; imv < numVectors; ++imv) {
1260 for (
size_t i = 0; i < D_block_->getLocalNumRows(); ++i) {
1262 const_host_little_vec_type xval =
1263 xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1264 little_host_vec_type cval =
1265 cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1267 Tpetra::COPY(xval, cval);
1269 local_inds_host_view_type colValsL;
1270 values_host_view_type valsL;
1271 L_block_->getLocalRowView(local_row, colValsL, valsL);
1272 LO NumL = (LO)colValsL.size();
1274 for (LO j = 0; j < NumL; ++j) {
1275 LO col = colValsL[j];
1276 const_host_little_vec_type prevVal =
1277 cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1279 const LO matOffset = blockMatSize * j;
1280 little_block_host_type lij((
typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
1283 Tpetra::GEMV(-one, lij, prevVal, cval);
1289 D_block_->applyBlock(cBlock, rBlock);
1292 for (LO imv = 0; imv < numVectors; ++imv) {
1293 const LO numRows = D_block_->getLocalNumRows();
1294 for (LO i = 0; i < numRows; ++i) {
1295 LO local_row = (numRows - 1) - i;
1296 const_host_little_vec_type rval =
1297 rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1298 little_host_vec_type yval =
1299 yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1301 Tpetra::COPY(rval, yval);
1303 local_inds_host_view_type colValsU;
1304 values_host_view_type valsU;
1305 U_block_->getLocalRowView(local_row, colValsU, valsU);
1306 LO NumU = (LO)colValsU.size();
1308 for (LO j = 0; j < NumU; ++j) {
1309 LO col = colValsU[NumU - 1 - j];
1310 const_host_little_vec_type prevVal =
1311 yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1313 const LO matOffset = blockMatSize * (NumU - 1 - j);
1314 little_block_host_type uij((
typename little_block_host_type::value_type*)&valsU[matOffset], blockSize_, rowStride);
1317 Tpetra::GEMV(-one, uij, prevVal, yval);
1322 TEUCHOS_TEST_FOR_EXCEPTION(
1323 true, std::runtime_error,
1324 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1327 if (alpha == zero) {
1334 MV Y_tmp(Y.getMap(), Y.getNumVectors());
1335 apply(X, Y_tmp, mode);
1336 Y.update(alpha, Y_tmp, beta);
1341 auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
1342 auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
1343 const LO numVecs = X.getNumVectors();
1346 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1348 if (!this->isKokkosKernelsStream_) {
1349 auto lclL = L_block_->getLocalMatrixDevice();
1350 auto L_rowmap = lclL.graph.row_map;
1351 auto L_entries = lclL.graph.entries;
1352 auto L_values = lclL.values;
1354 auto lclU = U_block_->getLocalMatrixDevice();
1355 auto U_rowmap = lclU.graph.row_map;
1356 auto U_entries = lclU.graph.entries;
1357 auto U_values = lclU.values;
1359 for (LO vec = 0; vec < numVecs; ++vec) {
1360 auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
1361 KokkosSparse::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
1364 for (LO vec = 0; vec < numVecs; ++vec) {
1365 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1366 KokkosSparse::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
1369 stream_apply_impl(X_views, Y_views,
false,
true, L_block_v_, L_Sptrsv_KernelHandle_v_, numVecs);
1370 stream_apply_impl(X_views, Y_views,
true,
false, U_block_v_, U_Sptrsv_KernelHandle_v_, numVecs);
1373 KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
1378 this->numApply_ += 1;
1379 this->applyTime_ += (timer.
wallTime() - startTime);
1382 template <
class MatrixType>
1384 std::ostringstream os;
1389 os <<
"\"Ifpack2::Experimental::RBILUK\": {";
1390 os <<
"Initialized: " << (this->isInitialized() ?
"true" :
"false") <<
", "
1391 <<
"Computed: " << (this->isComputed() ?
"true" :
"false") <<
", ";
1393 os <<
"Level-of-fill: " << this->getLevelOfFill() <<
", ";
1396 os <<
"Matrix: null";
1398 os <<
"Global matrix dimensions: ["
1399 << this->A_->getGlobalNumRows() <<
", " << this->A_->getGlobalNumCols() <<
"]"
1400 <<
", Global nnz: " << this->A_->getGlobalNumEntries();
1415 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S, LO, GO, N) \
1416 template class Ifpack2::Experimental::RBILUK<Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
1417 template class Ifpack2::Experimental::RBILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:344
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:111
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:159
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:208
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:101
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1200
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:120
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:213
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:115
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:134
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:107
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:195
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:701
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:67
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:162
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:182
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:127
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1383
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:95