Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Experimental_RBILUK_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
11 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
12 
13 #include "Tpetra_BlockMultiVector.hpp"
14 #include "Tpetra_BlockView.hpp"
15 #include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"
16 #include "Ifpack2_OverlappingRowMatrix.hpp"
17 #include "Ifpack2_Details_getCrsMatrix.hpp"
18 #include "Ifpack2_LocalFilter.hpp"
19 #include "Ifpack2_Utilities.hpp"
20 #include "Ifpack2_RILUK.hpp"
21 #include "KokkosSparse_sptrsv.hpp"
22 
23 //#define IFPACK2_RBILUK_INITIAL
24 //#define IFPACK2_RBILUK_INITIAL_NOKK
25 
26 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
27 #include "KokkosBatched_Gemm_Decl.hpp"
28 #include "KokkosBatched_Gemm_Serial_Impl.hpp"
29 #include "KokkosBatched_Util.hpp"
30 #endif
31 
32 namespace Ifpack2 {
33 
34 namespace Experimental {
35 
36 namespace {
37 template <class MatrixType>
38 struct LocalRowHandler {
39  using LocalOrdinal = typename MatrixType::local_ordinal_type;
40  using row_matrix_type = Tpetra::RowMatrix<
41  typename MatrixType::scalar_type,
42  LocalOrdinal,
43  typename MatrixType::global_ordinal_type,
44  typename MatrixType::node_type>;
45  using inds_type = typename row_matrix_type::local_inds_host_view_type;
46  using vals_type = typename row_matrix_type::values_host_view_type;
47 
48  LocalRowHandler(Teuchos::RCP<const row_matrix_type> A)
49  : A_(std::move(A)) {
50  if (!A_->supportsRowViews()) {
51  const auto maxNumRowEntr = A_->getLocalMaxNumRowEntries();
52  const auto blockSize = A_->getBlockSize();
53  ind_nc_ = inds_type_nc("Ifpack2::RBILUK::LocalRowHandler::indices", maxNumRowEntr);
54  val_nc_ = vals_type_nc("Ifpack2::RBILUK::LocalRowHandler::values", maxNumRowEntr * blockSize * blockSize);
55  }
56  }
57 
58  void getLocalRow(LocalOrdinal local_row, inds_type& InI, vals_type& InV, LocalOrdinal& NumIn) {
59  if (A_->supportsRowViews()) {
60  A_->getLocalRowView(local_row, InI, InV);
61  NumIn = (LocalOrdinal)InI.size();
62  } else {
63  size_t cnt;
64  A_->getLocalRowCopy(local_row, ind_nc_, val_nc_, cnt);
65  InI = ind_nc_;
66  InV = val_nc_;
67  NumIn = (LocalOrdinal)cnt;
68  }
69  }
70 
71  private:
72  using inds_type_nc = typename row_matrix_type::nonconst_local_inds_host_view_type;
73  using vals_type_nc = typename row_matrix_type::nonconst_values_host_view_type;
74 
76  inds_type_nc ind_nc_;
77  vals_type_nc val_nc_;
78 };
79 
80 template <typename BsrMatrixType, typename CrsMatrixType>
81 CrsMatrixType convertBsrToCrs(const BsrMatrixType& bsrMatrix) {
82  using Ordinal = typename BsrMatrixType::ordinal_type;
83  using RowMapType = typename BsrMatrixType::row_map_type::non_const_type;
84  using EntriesType = typename BsrMatrixType::index_type::non_const_type;
85  using ValuesType = typename BsrMatrixType::values_type::non_const_type;
86  using ExeSpace = typename BsrMatrixType::execution_space;
87 
88  const Ordinal blockDim = bsrMatrix.blockDim();
89  const Ordinal blockSize = blockDim * blockDim;
90  const Ordinal numBlockRows = bsrMatrix.numRows();
91  const Ordinal numBlockCols = bsrMatrix.numCols();
92 
93  const auto blockRowMap = bsrMatrix.graph.row_map;
94  const auto blockEntries = bsrMatrix.graph.entries;
95  const auto blockValues = bsrMatrix.values;
96 
97  const Ordinal numRows = numBlockRows * blockDim;
98  const Ordinal numCols = numBlockCols * blockDim;
99 
100  // Allocate CrsMatrix row map
101  RowMapType crsRowMap("crsRowMap", numRows + 1);
102  EntriesType crsEntries("crsEntries", blockEntries.extent(0) * blockSize);
103  ValuesType crsValues("crsValues", blockValues.extent(0) * blockSize);
104 
105  Kokkos::RangePolicy<ExeSpace> policy(0, numBlockRows);
106 
107  // Fill CrsMatrix row map, entries, and values
108  Kokkos::parallel_for(
109  "ConvertBsrToCrs", policy, KOKKOS_LAMBDA(const Ordinal blockRow) {
110  const Ordinal blockRowStart = blockRowMap(blockRow);
111  const Ordinal blockRowEnd = blockRowMap(blockRow + 1);
112  const Ordinal blockRowCount = blockRowEnd - blockRowStart;
113 
114  // Iterate over block entries in this row. This could use a teamRange parallel_for
115  for (Ordinal blockNnz = blockRowStart; blockNnz < blockRowEnd; ++blockNnz) {
116  const Ordinal blockCol = blockEntries(blockNnz);
117  const Ordinal blockNum = blockNnz - blockRowStart;
118 
119  // Iterate over block dim to get the unblocked rows
120  for (Ordinal blockRowOffset = 0; blockRowOffset < blockDim; ++blockRowOffset) {
121  const Ordinal crsRow = blockRow * blockDim + blockRowOffset;
122  // Each unblocked row has blockRowCount * blockDim items
123  const Ordinal crsRowStart = blockRowStart * blockSize + blockRowCount * blockDim * blockRowOffset;
124  crsRowMap(crsRow) = crsRowStart;
125 
126  // Iterate over block dim to get the unblocked cols
127  for (Ordinal blockColOffset = 0; blockColOffset < blockDim; ++blockColOffset) {
128  const Ordinal crsCol = blockCol * blockDim + blockColOffset;
129  const Ordinal crsNnz = crsRowStart + blockNum * blockDim + blockColOffset;
130  crsEntries(crsNnz) = crsCol;
131  crsValues(crsNnz) = blockValues(blockNnz * blockSize + blockRowOffset * blockDim + blockColOffset);
132  }
133  }
134  }
135  });
136 
137  // Finalize CrsMatrix row map
138  Kokkos::RangePolicy<ExeSpace> policy2(0, 1);
139  Kokkos::parallel_for(
140  "FinalizeCrs", policy2, KOKKOS_LAMBDA(const Ordinal) {
141  crsRowMap(numRows) = blockRowMap(numBlockRows) * blockSize;
142  });
143 
144  // Construct CrsMatrix
145  return CrsMatrixType("crsMatrix", numRows, numCols, crsEntries.extent(0), crsValues, crsRowMap, crsEntries);
146 }
147 
148 } // namespace
149 
150 template <class MatrixType>
152  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in)) {}
153 
154 template <class MatrixType>
156  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in)) {}
157 
158 template <class MatrixType>
160 
161 template <class MatrixType>
163  // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
164 
165  // It's legal for A to be null; in that case, you may not call
166  // initialize() until calling setMatrix() with a nonnull input.
167  // Regardless, setting the matrix invalidates any previous
168  // factorization.
169  if (A.getRawPtr() != this->A_.getRawPtr()) {
170  this->isAllocated_ = false;
171  this->isInitialized_ = false;
172  this->isComputed_ = false;
173  this->Graph_ = Teuchos::null;
174  L_block_ = Teuchos::null;
175  U_block_ = Teuchos::null;
176  D_block_ = Teuchos::null;
177  }
178 }
179 
180 template <class MatrixType>
181 const typename RBILUK<MatrixType>::block_crs_matrix_type&
184  L_block_.is_null(), std::runtime_error,
185  "Ifpack2::RILUK::getL: The L factor "
186  "is null. Please call initialize() (and preferably also compute()) "
187  "before calling this method. If the input matrix has not yet been set, "
188  "you must first call setMatrix() with a nonnull input matrix before you "
189  "may call initialize() or compute().");
190  return *L_block_;
191 }
192 
193 template <class MatrixType>
194 const typename RBILUK<MatrixType>::block_crs_matrix_type&
197  D_block_.is_null(), std::runtime_error,
198  "Ifpack2::RILUK::getD: The D factor "
199  "(of diagonal entries) is null. Please call initialize() (and "
200  "preferably also compute()) before calling this method. If the input "
201  "matrix has not yet been set, you must first call setMatrix() with a "
202  "nonnull input matrix before you may call initialize() or compute().");
203  return *D_block_;
204 }
205 
206 template <class MatrixType>
207 const typename RBILUK<MatrixType>::block_crs_matrix_type&
210  U_block_.is_null(), std::runtime_error,
211  "Ifpack2::RILUK::getU: The U factor "
212  "is null. Please call initialize() (and preferably also compute()) "
213  "before calling this method. If the input matrix has not yet been set, "
214  "you must first call setMatrix() with a nonnull input matrix before you "
215  "may call initialize() or compute().");
216  return *U_block_;
217 }
218 
219 template <class MatrixType>
221  using Teuchos::null;
222  using Teuchos::rcp;
223 
224  if (!this->isAllocated_) {
225  if (!this->isKokkosKernelsStream_) {
226  // Deallocate any existing storage. This avoids storing 2x
227  // memory, since RCP op= does not deallocate until after the
228  // assignment.
229  this->L_ = null;
230  this->U_ = null;
231  this->D_ = null;
232  L_block_ = null;
233  U_block_ = null;
234  D_block_ = null;
235 
236  // Allocate Matrix using ILUK graphs
237  L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph(), blockSize_));
238  U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph(), blockSize_));
239  D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
240  blockSize_));
241  L_block_->setAllToScalar(STM::zero()); // Zero out L and U matrices
242  U_block_->setAllToScalar(STM::zero());
243  D_block_->setAllToScalar(STM::zero());
244 
245  // Allocate temp space for apply
246  if (this->isKokkosKernelsSpiluk_) {
247  const auto numRows = L_block_->getLocalNumRows();
248  tmp_ = view1d("RBILUK::tmp_", numRows * blockSize_);
249  }
250  } else {
251  this->D_ = null;
252  D_block_ = null;
253 
254  L_block_v_ = std::vector<Teuchos::RCP<block_crs_matrix_type> >(this->num_streams_);
255  U_block_v_ = std::vector<Teuchos::RCP<block_crs_matrix_type> >(this->num_streams_);
256  for (int i = 0; i < this->num_streams_; i++) {
257  L_block_v_[i] = rcp(new block_crs_matrix_type(*this->Graph_v_[i]->getL_Graph(), blockSize_));
258  U_block_v_[i] = rcp(new block_crs_matrix_type(*this->Graph_v_[i]->getU_Graph(), blockSize_));
259  L_block_v_[i]->setAllToScalar(STS::zero()); // Zero out L and U matrices
260  U_block_v_[i]->setAllToScalar(STS::zero());
261  }
262 
263  // Allocate temp space for apply
264  auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
265  const auto numRows = lclMtx.numRows();
266  tmp_ = view1d("RBILUK::tmp_", numRows * blockSize_);
267  }
268  }
269  this->isAllocated_ = true;
270 }
271 
272 namespace {
273 
274 template <class MatrixType>
276 getBlockCrsGraph(const Teuchos::RCP<const typename RBILUK<MatrixType>::row_matrix_type>& A) {
277  using local_ordinal_type = typename MatrixType::local_ordinal_type;
278  using Teuchos::RCP;
279  using Teuchos::rcp;
280  using Teuchos::rcp_const_cast;
281  using Teuchos::rcp_dynamic_cast;
282  using Teuchos::rcpFromRef;
283  using row_matrix_type = typename RBILUK<MatrixType>::row_matrix_type;
284  using crs_graph_type = typename RBILUK<MatrixType>::crs_graph_type;
285  using block_crs_matrix_type = typename RBILUK<MatrixType>::block_crs_matrix_type;
286 
287  auto A_local = RBILUK<MatrixType>::makeLocalFilter(A);
288 
289  {
290  RCP<const LocalFilter<row_matrix_type> > filteredA =
291  rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_local);
292  RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = Teuchos::null;
293  RCP<const block_crs_matrix_type> A_block = Teuchos::null;
294  if (!filteredA.is_null()) {
295  overlappedA = rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> >(filteredA->getUnderlyingMatrix());
296  }
297 
298  if (!overlappedA.is_null()) {
299  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
300  } else if (!filteredA.is_null()) {
301  // If there is no overlap, filteredA could be the block CRS matrix
302  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
303  } else {
304  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(A_local);
305  }
306 
307  if (!A_block.is_null()) {
308  return rcpFromRef(A_block->getCrsGraph());
309  }
310  }
311 
312  // Could not extract block crs, make graph manually
313  {
314  local_ordinal_type numRows = A_local->getLocalNumRows();
315  Teuchos::Array<size_t> entriesPerRow(numRows);
316  for (local_ordinal_type i = 0; i < numRows; i++) {
317  entriesPerRow[i] = A_local->getNumEntriesInLocalRow(i);
318  }
319  RCP<crs_graph_type> A_local_crs_nc =
320  rcp(new crs_graph_type(A_local->getRowMap(),
321  A_local->getColMap(),
322  entriesPerRow()));
323 
324  {
325  using LocalRowHandler_t = LocalRowHandler<MatrixType>;
326  LocalRowHandler_t localRowHandler(A_local);
327  typename LocalRowHandler_t::inds_type indices;
328  typename LocalRowHandler_t::vals_type values;
329  for (local_ordinal_type i = 0; i < numRows; i++) {
330  local_ordinal_type numEntries = 0;
331  localRowHandler.getLocalRow(i, indices, values, numEntries);
332  A_local_crs_nc->insertLocalIndices(i, numEntries, indices.data());
333  }
334  }
335 
336  A_local_crs_nc->fillComplete(A_local->getDomainMap(), A_local->getRangeMap());
337  return rcp_const_cast<const crs_graph_type>(A_local_crs_nc);
338  }
339 }
340 
341 } // namespace
342 
343 template <class MatrixType>
345  using Teuchos::Array;
346  using Teuchos::RCP;
347  using Teuchos::rcp;
348  using Teuchos::rcp_dynamic_cast;
349 
350  using crs_map_type = Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>;
351 
352  const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
353 
354  TEUCHOS_TEST_FOR_EXCEPTION(this->A_.is_null(), std::runtime_error, prefix << "The matrix (A_, the "
355  "RowMatrix) is null. Please call setMatrix() with a nonnull input "
356  "before calling this method.");
357  TEUCHOS_TEST_FOR_EXCEPTION(!this->A_->isFillComplete(), std::runtime_error, prefix << "The matrix "
358  "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
359  "initialize() or compute() with this matrix until the matrix is fill "
360  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
361  "underlying graph is fill complete.");
362 
363  blockSize_ = this->A_->getBlockSize();
364  this->A_local_ = this->makeLocalFilter(this->A_);
365 
366  Teuchos::Time timer("RBILUK::initialize");
367  double startTime = timer.wallTime();
368  { // Start timing
369  Teuchos::TimeMonitor timeMon(timer);
370 
371  // Calling initialize() means that the user asserts that the graph
372  // of the sparse matrix may have changed. We must not just reuse
373  // the previous graph in that case.
374  //
375  // Regarding setting isAllocated_ to false: Eventually, we may want
376  // some kind of clever memory reuse strategy, but it's always
377  // correct just to blow everything away and start over.
378  this->isInitialized_ = false;
379  this->isAllocated_ = false;
380  this->isComputed_ = false;
381  this->Graph_ = Teuchos::null;
382 
383  if (this->isKokkosKernelsSpiluk_) {
384  A_local_bcrs_ = Details::getBcrsMatrix(this->A_local_);
385  RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->A_local_);
386  if (A_local_bcrs_.is_null()) {
387  if (A_local_crs.is_null()) {
388  local_ordinal_type numRows = this->A_local_->getLocalNumRows();
389  Array<size_t> entriesPerRow(numRows);
390  for (local_ordinal_type i = 0; i < numRows; i++) {
391  entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
392  }
393  A_local_crs_nc_ =
394  rcp(new crs_matrix_type(this->A_local_->getRowMap(),
395  this->A_local_->getColMap(),
396  entriesPerRow()));
397  // copy entries into A_local_crs
398  nonconst_local_inds_host_view_type indices("indices", this->A_local_->getLocalMaxNumRowEntries());
399  nonconst_values_host_view_type values("values", this->A_local_->getLocalMaxNumRowEntries());
400  for (local_ordinal_type i = 0; i < numRows; i++) {
401  size_t numEntries = 0;
402  this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
403  A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
404  }
405  A_local_crs_nc_->fillComplete(this->A_local_->getDomainMap(), this->A_local_->getRangeMap());
406  A_local_crs = Teuchos::rcp_const_cast<const crs_matrix_type>(A_local_crs_nc_);
407  }
408  // Create bcrs from crs
409  // We can skip fillLogicalBlocks if we know the input is already filled
410  if (blockSize_ > 1) {
411  auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
412  A_local_bcrs_ = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
413  } else {
414  A_local_bcrs_ = Tpetra::convertToBlockCrsMatrix(*A_local_crs, blockSize_);
415  }
416  }
417  }
418 
419  if (this->isKokkosKernelsStream_) {
420  std::vector<int> weights(this->num_streams_);
421  std::fill(weights.begin(), weights.end(), 1);
422  this->exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
423 
424  auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
425  this->Graph_v_ = std::vector<Teuchos::RCP<Ifpack2::IlukGraph<crs_graph_type, kk_handle_type> > >(this->num_streams_);
426  A_block_local_diagblks_v_ = std::vector<local_matrix_device_type>(this->num_streams_);
427  std::vector<local_crs_matrix_device_type> A_crs_local_diagblks_v(this->num_streams_);
428 
429  if (!this->hasStreamReordered_) {
430  auto lclCrsMtx = convertBsrToCrs<local_matrix_device_type, local_crs_matrix_device_type>(lclMtx);
431  KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclCrsMtx, A_crs_local_diagblks_v);
432  } else {
433  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, prefix << "Stream reordering is not currently supported for RBILUK");
434  }
435 
436  for (int i = 0; i < this->num_streams_; i++) {
437  A_block_local_diagblks_v_[i] = local_matrix_device_type(A_crs_local_diagblks_v[i], blockSize_);
438  }
439 
440  A_block_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(this->num_streams_);
441  A_block_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(this->num_streams_);
442  A_block_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(this->num_streams_);
443 
444  for (int i = 0; i < this->num_streams_; i++) {
445  A_block_local_diagblks_rowmap_v_[i] = A_block_local_diagblks_v_[i].graph.row_map;
446  A_block_local_diagblks_entries_v_[i] = A_block_local_diagblks_v_[i].graph.entries;
447  A_block_local_diagblks_values_v_[i] = A_block_local_diagblks_v_[i].values;
448 
449  Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp(new crs_map_type(A_block_local_diagblks_v_[i].numRows(),
450  A_block_local_diagblks_v_[i].numRows(),
451  A_local_bcrs_->getRowMap()->getComm()));
452  Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp(new crs_map_type(A_block_local_diagblks_v_[i].numCols(),
453  A_block_local_diagblks_v_[i].numCols(),
454  A_local_bcrs_->getColMap()->getComm()));
455 
456  Teuchos::RCP<const crs_graph_type> graph = rcp(new crs_graph_type(A_local_diagblks_RowMap, A_local_diagblks_ColMap, A_block_local_diagblks_v_[i].graph));
457  this->Graph_v_[i] = rcp(new Ifpack2::IlukGraph<crs_graph_type, kk_handle_type>(graph,
458  this->LevelOfFill_, 0));
459  }
460  } else {
461  RCP<const crs_graph_type> matrixCrsGraph = getBlockCrsGraph<MatrixType>(this->A_);
462  this->Graph_ = rcp(new Ifpack2::IlukGraph<crs_graph_type, kk_handle_type>(matrixCrsGraph,
463  this->LevelOfFill_, 0));
464  }
465 
466  if (this->isKokkosKernelsSpiluk_) {
467  if (!this->isKokkosKernelsStream_) {
468  KernelHandle_block_ = Teuchos::rcp(new kk_handle_type());
469  const auto numRows = this->A_local_->getLocalNumRows();
470  KernelHandle_block_->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
471  numRows,
472  2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
473  2 * this->A_local_->getLocalNumEntries() * (this->LevelOfFill_ + 1),
474  blockSize_);
475  this->Graph_->initialize(KernelHandle_block_); // this calls spiluk_symbolic
476 
477  L_Sptrsv_KernelHandle_ = Teuchos::rcp(new kk_handle_type());
478  U_Sptrsv_KernelHandle_ = Teuchos::rcp(new kk_handle_type());
479 
480  KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
481 
482  L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_);
483  U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_);
484  } else {
485  KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
486  KernelHandle_block_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(this->num_streams_);
487  L_Sptrsv_KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(this->num_streams_);
488  U_Sptrsv_KernelHandle_v_ = std::vector<Teuchos::RCP<kk_handle_type> >(this->num_streams_);
489  for (int i = 0; i < this->num_streams_; i++) {
490  const auto numRows = A_block_local_diagblks_v_[i].numRows();
491  KernelHandle_block_v_[i] = Teuchos::rcp(new kk_handle_type());
492  KernelHandle_block_v_[i]->create_spiluk_handle(KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
493  numRows,
494  2 * A_block_local_diagblks_v_[i].nnz() * (this->LevelOfFill_ + 1),
495  2 * A_block_local_diagblks_v_[i].nnz() * (this->LevelOfFill_ + 1),
496  blockSize_);
497  this->Graph_v_[i]->initialize(KernelHandle_block_v_[i]); // this calls spiluk_symbolic
498 
499  L_Sptrsv_KernelHandle_v_[i] = Teuchos::rcp(new kk_handle_type());
500  U_Sptrsv_KernelHandle_v_[i] = Teuchos::rcp(new kk_handle_type());
501 
502  L_Sptrsv_KernelHandle_v_[i]->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_);
503  U_Sptrsv_KernelHandle_v_[i]->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_);
504  }
505  }
506  } else {
507  this->Graph_->initialize();
508  }
509 
510  allocate_L_and_U_blocks();
511 
512 #ifdef IFPACK2_RBILUK_INITIAL
513  initAllValues();
514 #endif
515  } // Stop timing
516 
517  this->isInitialized_ = true;
518  this->numInitialize_ += 1;
519  this->initializeTime_ += (timer.wallTime() - startTime);
520 }
521 
522 template <class MatrixType>
524  initAllValues() {
525  using Teuchos::RCP;
526  typedef Tpetra::Map<LO, GO, node_type> map_type;
527 
528  LO NumIn = 0, NumL = 0, NumU = 0;
529  bool DiagFound = false;
530  size_t NumNonzeroDiags = 0;
531  size_t MaxNumEntries = this->A_->getLocalMaxNumRowEntries();
532  LO blockMatSize = blockSize_ * blockSize_;
533 
534  // First check that the local row map ordering is the same as the local portion of the column map.
535  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
536  // implicitly assume that this is the case.
537  Teuchos::ArrayView<const GO> rowGIDs = this->A_->getRowMap()->getLocalElementList();
538  Teuchos::ArrayView<const GO> colGIDs = this->A_->getColMap()->getLocalElementList();
539  bool gidsAreConsistentlyOrdered = true;
540  GO indexOfInconsistentGID = 0;
541  for (GO i = 0; i < rowGIDs.size(); ++i) {
542  if (rowGIDs[i] != colGIDs[i]) {
543  gidsAreConsistentlyOrdered = false;
544  indexOfInconsistentGID = i;
545  break;
546  }
547  }
548  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered == false, std::runtime_error,
549  "The ordering of the local GIDs in the row and column maps is not the same"
550  << std::endl
551  << "at index " << indexOfInconsistentGID
552  << ". Consistency is required, as all calculations are done with"
553  << std::endl
554  << "local indexing.");
555 
556  // Allocate temporary space for extracting the strictly
557  // lower and upper parts of the matrix A.
558  Teuchos::Array<LO> LI(MaxNumEntries);
559  Teuchos::Array<LO> UI(MaxNumEntries);
560  Teuchos::Array<scalar_type> LV(MaxNumEntries * blockMatSize);
561  Teuchos::Array<scalar_type> UV(MaxNumEntries * blockMatSize);
562 
563  Teuchos::Array<scalar_type> diagValues(blockMatSize);
564 
565  L_block_->setAllToScalar(STM::zero()); // Zero out L and U matrices
566  U_block_->setAllToScalar(STM::zero());
567  D_block_->setAllToScalar(STM::zero()); // Set diagonal values to zero
568 
569  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
570  // host, so sync to host first. The const_cast is unfortunate but
571  // is our only option to make this correct.
572 
573  /*
574  const_cast<block_crs_matrix_type&> (A).sync_host ();
575  L_block_->sync_host ();
576  U_block_->sync_host ();
577  D_block_->sync_host ();
578  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
579  L_block_->modify_host ();
580  U_block_->modify_host ();
581  D_block_->modify_host ();
582  */
583 
584  RCP<const map_type> rowMap = L_block_->getRowMap();
585 
586  // First we copy the user's matrix into L and U, regardless of fill level.
587  // It is important to note that L and U are populated using local indices.
588  // This means that if the row map GIDs are not monotonically increasing
589  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
590  // matrix is not the one that you would get if you based L (U) on GIDs.
591  // This is ok, as the *order* of the GIDs in the rowmap is a better
592  // expression of the user's intent than the GIDs themselves.
593 
594  // TODO BMK: Revisit this fence when BlockCrsMatrix is refactored.
595  Kokkos::fence();
596  using LocalRowHandler_t = LocalRowHandler<MatrixType>;
597  LocalRowHandler_t localRowHandler(this->A_);
598  typename LocalRowHandler_t::inds_type InI;
599  typename LocalRowHandler_t::vals_type InV;
600  for (size_t myRow = 0; myRow < this->A_->getLocalNumRows(); ++myRow) {
601  LO local_row = myRow;
602 
603  localRowHandler.getLocalRow(local_row, InI, InV, NumIn);
604 
605  // Split into L and U (we don't assume that indices are ordered).
606  NumL = 0;
607  NumU = 0;
608  DiagFound = false;
609 
610  for (LO j = 0; j < NumIn; ++j) {
611  const LO k = InI[j];
612  const LO blockOffset = blockMatSize * j;
613 
614  if (k == local_row) {
615  DiagFound = true;
616  // Store perturbed diagonal in Tpetra::Vector D_
617  for (LO jj = 0; jj < blockMatSize; ++jj)
618  diagValues[jj] = this->Rthresh_ * InV[blockOffset + jj] + IFPACK2_SGN(InV[blockOffset + jj]) * this->Athresh_;
619  D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
620  } else if (k < 0) { // Out of range
622  true, std::runtime_error,
623  "Ifpack2::RILUK::initAllValues: current "
624  "GID k = "
625  << k << " < 0. I'm not sure why this is an error; it is "
626  "probably an artifact of the undocumented assumptions of the "
627  "original implementation (likely copied and pasted from Ifpack). "
628  "Nevertheless, the code I found here insisted on this being an error "
629  "state, so I will throw an exception here.");
630  } else if (k < local_row) {
631  LI[NumL] = k;
632  const LO LBlockOffset = NumL * blockMatSize;
633  for (LO jj = 0; jj < blockMatSize; ++jj)
634  LV[LBlockOffset + jj] = InV[blockOffset + jj];
635  NumL++;
636  } else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
637  UI[NumU] = k;
638  const LO UBlockOffset = NumU * blockMatSize;
639  for (LO jj = 0; jj < blockMatSize; ++jj)
640  UV[UBlockOffset + jj] = InV[blockOffset + jj];
641  NumU++;
642  }
643  }
644 
645  // Check in things for this row of L and U
646 
647  if (DiagFound) {
648  ++NumNonzeroDiags;
649  } else {
650  for (LO jj = 0; jj < blockSize_; ++jj)
651  diagValues[jj * (blockSize_ + 1)] = this->Athresh_;
652  D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
653  }
654 
655  if (NumL) {
656  L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
657  }
658 
659  if (NumU) {
660  U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
661  }
662  }
663 
664  // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
665  // ever gets a device implementation.
666  /*
667  {
668  typedef typename block_crs_matrix_type::device_type device_type;
669  const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
670  L_block_->template sync<device_type> ();
671  U_block_->template sync<device_type> ();
672  D_block_->template sync<device_type> ();
673  }
674  */
675  this->isInitialized_ = true;
676 }
677 
678 namespace { // (anonymous)
679 
680 // For a given Kokkos::View type, possibly unmanaged, get the
681 // corresponding managed Kokkos::View type. This is handy for
682 // translating from little_block_type or little_host_vec_type (both
683 // possibly unmanaged) to their managed versions.
684 template <class LittleBlockType>
685 struct GetManagedView {
686  static_assert(Kokkos::is_view<LittleBlockType>::value,
687  "LittleBlockType must be a Kokkos::View.");
688  typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
689  typename LittleBlockType::array_layout,
690  typename LittleBlockType::device_type>
691  managed_non_const_type;
692  static_assert(static_cast<int>(managed_non_const_type::rank) ==
693  static_cast<int>(LittleBlockType::rank),
694  "managed_non_const_type::rank != LittleBlock::rank. "
695  "Please report this bug to the Ifpack2 developers.");
696 };
697 
698 } // namespace
699 
700 template <class MatrixType>
702  using Teuchos::Array;
703  using Teuchos::RCP;
704  using Teuchos::rcp;
705 
706  typedef impl_scalar_type IST;
707  const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
708 
709  // initialize() checks this too, but it's easier for users if the
710  // error shows them the name of the method that they actually
711  // called, rather than the name of some internally called method.
712  TEUCHOS_TEST_FOR_EXCEPTION(this->A_.is_null(), std::runtime_error, prefix << "The matrix (A_, "
713  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
714  "input before calling this method.");
715  TEUCHOS_TEST_FOR_EXCEPTION(!this->A_->isFillComplete(), std::runtime_error, prefix << "The matrix "
716  "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
717  "initialize() or compute() with this matrix until the matrix is fill "
718  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
719  "underlying graph is fill complete.");
720 
721  if (!this->isInitialized()) {
722  initialize(); // Don't count this in the compute() time
723  }
724 
725  // // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
726  // // host, so sync to host first. The const_cast is unfortunate but
727  // // is our only option to make this correct.
728  // if (! A_block_.is_null ()) {
729  // Teuchos::RCP<block_crs_matrix_type> A_nc =
730  // Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
731  // // A_nc->sync_host ();
732  // }
733  /*
734  L_block_->sync_host ();
735  U_block_->sync_host ();
736  D_block_->sync_host ();
737  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
738  L_block_->modify_host ();
739  U_block_->modify_host ();
740  D_block_->modify_host ();
741  */
742 
743  Teuchos::Time timer("RBILUK::compute");
744  double startTime = timer.wallTime();
745  { // Start timing
746  Teuchos::TimeMonitor timeMon(timer);
747  this->isComputed_ = false;
748 
749  // MinMachNum should be officially defined, for now pick something a little
750  // bigger than IEEE underflow value
751 
752  // const scalar_type MinDiagonalValue = STS::rmin ();
753  // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
754  if (!this->isKokkosKernelsSpiluk_) {
755  initAllValues();
756  size_t NumIn;
757  LO NumL, NumU, NumURead;
758 
760  this->isKokkosKernelsStream_, std::runtime_error,
761  "Ifpack2::RBILUK::compute: "
762  "streams are not yet supported without KokkosKernels.");
763 
764  // Get Maximum Row length
765  const size_t MaxNumEntries =
766  L_block_->getLocalMaxNumRowEntries() + U_block_->getLocalMaxNumRowEntries() + 1;
767 
768  const LO blockMatSize = blockSize_ * blockSize_;
769 
770  // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
771  // expressing these strides explicitly, in order to finish #177
772  // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
773  const LO rowStride = blockSize_;
774 
775  Teuchos::Array<int> ipiv_teuchos(blockSize_);
776  Kokkos::View<int*, Kokkos::HostSpace,
777  Kokkos::MemoryUnmanaged>
778  ipiv(ipiv_teuchos.getRawPtr(), blockSize_);
779  Teuchos::Array<IST> work_teuchos(blockSize_);
780  Kokkos::View<IST*, Kokkos::HostSpace,
781  Kokkos::MemoryUnmanaged>
782  work(work_teuchos.getRawPtr(), blockSize_);
783 
784  size_t num_cols = U_block_->getColMap()->getLocalNumElements();
785  Teuchos::Array<int> colflag(num_cols);
786 
787  typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock("diagModBlock", blockSize_, blockSize_);
788  typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp("matTmp", blockSize_, blockSize_);
789  typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier("multiplier", blockSize_, blockSize_);
790 
791  // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
792 
793  // Now start the factorization.
794 
795  // Need some integer workspace and pointers
796  LO NumUU;
797  for (size_t j = 0; j < num_cols; ++j) {
798  colflag[j] = -1;
799  }
800  Teuchos::Array<LO> InI(MaxNumEntries, 0);
801  Teuchos::Array<scalar_type> InV(MaxNumEntries * blockMatSize, STM::zero());
802 
803  const LO numLocalRows = L_block_->getLocalNumRows();
804  for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
805  // Fill InV, InI with current row of L, D and U combined
806 
807  NumIn = MaxNumEntries;
808  local_inds_host_view_type colValsL;
809  values_host_view_type valsL;
810 
811  L_block_->getLocalRowView(local_row, colValsL, valsL);
812  NumL = (LO)colValsL.size();
813  for (LO j = 0; j < NumL; ++j) {
814  const LO matOffset = blockMatSize * j;
815  little_block_host_type lmat((typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
816  little_block_host_type lmatV((typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
817  // lmatV.assign(lmat);
818  Tpetra::COPY(lmat, lmatV);
819  InI[j] = colValsL[j];
820  }
821 
822  little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
823  little_block_host_type dmatV((typename little_block_host_type::value_type*)&InV[NumL * blockMatSize], blockSize_, rowStride);
824  // dmatV.assign(dmat);
825  Tpetra::COPY(dmat, dmatV);
826  InI[NumL] = local_row;
827 
828  local_inds_host_view_type colValsU;
829  values_host_view_type valsU;
830  U_block_->getLocalRowView(local_row, colValsU, valsU);
831  NumURead = (LO)colValsU.size();
832  NumU = 0;
833  for (LO j = 0; j < NumURead; ++j) {
834  if (!(colValsU[j] < numLocalRows)) continue;
835  InI[NumL + 1 + j] = colValsU[j];
836  const LO matOffset = blockMatSize * (NumL + 1 + j);
837  little_block_host_type umat((typename little_block_host_type::value_type*)&valsU[blockMatSize * j], blockSize_, rowStride);
838  little_block_host_type umatV((typename little_block_host_type::value_type*)&InV[matOffset], blockSize_, rowStride);
839  // umatV.assign(umat);
840  Tpetra::COPY(umat, umatV);
841  NumU += 1;
842  }
843  NumIn = NumL + NumU + 1;
844 
845  // Set column flags
846  for (size_t j = 0; j < NumIn; ++j) {
847  colflag[InI[j]] = j;
848  }
849 
850 #ifndef IFPACK2_RBILUK_INITIAL
851  for (LO i = 0; i < blockSize_; ++i)
852  for (LO j = 0; j < blockSize_; ++j) {
853  {
854  diagModBlock(i, j) = 0;
855  }
856  }
857 #else
858  scalar_type diagmod = STM::zero(); // Off-diagonal accumulator
859  Kokkos::deep_copy(diagModBlock, diagmod);
860 #endif
861 
862  for (LO jj = 0; jj < NumL; ++jj) {
863  LO j = InI[jj];
864  little_block_host_type currentVal((typename little_block_host_type::value_type*)&InV[jj * blockMatSize], blockSize_, rowStride); // current_mults++;
865  // multiplier.assign(currentVal);
866  Tpetra::COPY(currentVal, multiplier);
867 
868  const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j, j);
869  // alpha = 1, beta = 0
870 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
871  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
872  KokkosBatched::Trans::NoTranspose,
873  KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), currentVal, dmatInverse, STS::zero(), matTmp);
874 #else
875  Tpetra::GEMM("N", "N", STS::one(), currentVal, dmatInverse,
876  STS::zero(), matTmp);
877 #endif
878  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.data ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.data ()), reinterpret_cast<impl_scalar_type*> (matTmp.data ()), blockSize_);
879  // currentVal.assign(matTmp);
880  Tpetra::COPY(matTmp, currentVal);
881  local_inds_host_view_type UUI;
882  values_host_view_type UUV;
883 
884  U_block_->getLocalRowView(j, UUI, UUV);
885  NumUU = (LO)UUI.size();
886 
887  if (this->RelaxValue_ == STM::zero()) {
888  for (LO k = 0; k < NumUU; ++k) {
889  if (!(UUI[k] < numLocalRows)) continue;
890  const int kk = colflag[UUI[k]];
891  if (kk > -1) {
892  little_block_host_type kkval((typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
893  little_block_host_type uumat((typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
894 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
895  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
896  KokkosBatched::Trans::NoTranspose,
897  KokkosBatched::Algo::Gemm::Blocked>::invoke(magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
898 #else
899  Tpetra::GEMM("N", "N", magnitude_type(-STM::one()), multiplier, uumat,
900  STM::one(), kkval);
901 #endif
902  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.data ()), reinterpret_cast<impl_scalar_type*> (uumat.data ()), reinterpret_cast<impl_scalar_type*> (kkval.data ()), blockSize_, -STM::one(), STM::one());
903  }
904  }
905  } else {
906  for (LO k = 0; k < NumUU; ++k) {
907  if (!(UUI[k] < numLocalRows)) continue;
908  const int kk = colflag[UUI[k]];
909  little_block_host_type uumat((typename little_block_host_type::value_type*)&UUV[k * blockMatSize], blockSize_, rowStride);
910  if (kk > -1) {
911  little_block_host_type kkval((typename little_block_host_type::value_type*)&InV[kk * blockMatSize], blockSize_, rowStride);
912 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
913  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
914  KokkosBatched::Trans::NoTranspose,
915  KokkosBatched::Algo::Gemm::Blocked>::invoke(magnitude_type(-STM::one()), multiplier, uumat, STM::one(), kkval);
916 #else
917  Tpetra::GEMM("N", "N", magnitude_type(-STM::one()), multiplier, uumat,
918  STM::one(), kkval);
919 #endif
920  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(kkval.data ()), blockSize_, -STM::one(), STM::one());
921  } else {
922 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
923  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
924  KokkosBatched::Trans::NoTranspose,
925  KokkosBatched::Algo::Gemm::Blocked>::invoke(magnitude_type(-STM::one()), multiplier, uumat, STM::one(), diagModBlock);
926 #else
927  Tpetra::GEMM("N", "N", magnitude_type(-STM::one()), multiplier, uumat,
928  STM::one(), diagModBlock);
929 #endif
930  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.data ()), blockSize_, -STM::one(), STM::one());
931  }
932  }
933  }
934  }
935  if (NumL) {
936  // Replace current row of L
937  L_block_->replaceLocalValues(local_row, InI.getRawPtr(), InV.getRawPtr(), NumL);
938  }
939 
940  // dmat.assign(dmatV);
941  Tpetra::COPY(dmatV, dmat);
942 
943  if (this->RelaxValue_ != STM::zero()) {
944  // dmat.update(this->RelaxValue_, diagModBlock);
945  Tpetra::AXPY(this->RelaxValue_, diagModBlock, dmat);
946  }
947 
948  // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
949  // if (STS::real (DV[i]) < STM::zero ()) {
950  // DV[i] = -MinDiagonalValue;
951  // }
952  // else {
953  // DV[i] = MinDiagonalValue;
954  // }
955  // }
956  // else
957  {
958  int lapackInfo = 0;
959  for (int k = 0; k < blockSize_; ++k) {
960  ipiv[k] = 0;
961  }
962 
963  Tpetra::GETF2(dmat, ipiv, lapackInfo);
964  // lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
966  lapackInfo != 0, std::runtime_error,
967  "Ifpack2::Experimental::RBILUK::compute: "
968  "lapackInfo = "
969  << lapackInfo << " which indicates an error in the factorization GETRF.");
970 
971  Tpetra::GETRI(dmat, ipiv, work, lapackInfo);
972  // lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
974  lapackInfo != 0, std::runtime_error,
975  "Ifpack2::Experimental::RBILUK::compute: "
976  "lapackInfo = "
977  << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
978  }
979 
980  for (LO j = 0; j < NumU; ++j) {
981  little_block_host_type currentVal((typename little_block_host_type::value_type*)&InV[(NumL + 1 + j) * blockMatSize], blockSize_, rowStride); // current_mults++;
982  // scale U by the diagonal inverse
983 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
984  KokkosBatched::SerialGemm<KokkosBatched::Trans::NoTranspose,
985  KokkosBatched::Trans::NoTranspose,
986  KokkosBatched::Algo::Gemm::Blocked>::invoke(STS::one(), dmat, currentVal, STS::zero(), matTmp);
987 #else
988  Tpetra::GEMM("N", "N", STS::one(), dmat, currentVal,
989  STS::zero(), matTmp);
990 #endif
991  // blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.data ()), reinterpret_cast<impl_scalar_type*>(currentVal.data ()), reinterpret_cast<impl_scalar_type*>(matTmp.data ()), blockSize_);
992  // currentVal.assign(matTmp);
993  Tpetra::COPY(matTmp, currentVal);
994  }
995 
996  if (NumU) {
997  // Replace current row of L and U
998  U_block_->replaceLocalValues(local_row, &InI[NumL + 1], &InV[blockMatSize * (NumL + 1)], NumU);
999  }
1000 
1001 #ifndef IFPACK2_RBILUK_INITIAL
1002  // Reset column flags
1003  for (size_t j = 0; j < NumIn; ++j) {
1004  colflag[InI[j]] = -1;
1005  }
1006 #else
1007  // Reset column flags
1008  for (size_t j = 0; j < num_cols; ++j) {
1009  colflag[j] = -1;
1010  }
1011 #endif
1012  }
1013  } // !this->isKokkosKernelsSpiluk_
1014  else {
1015  // If we are not using A_local_ directly, we must copy values in case of pattern reuse. Due
1016  // to having to deal with filling logical blocks and converting back and forth between CRS and
1017  // BSR, this is not very performant and a lot of computation has to be repeated. If you are going
1018  // to do patten reuse, I recommend having A_local be a BSR.
1019  auto A_local_bcrs_temp = Details::getBcrsMatrix(this->A_local_);
1020  auto A_local_crs_temp = Details::getCrsMatrix(this->A_local_);
1021  if (A_local_bcrs_temp.is_null()) {
1022  if (A_local_crs_temp.is_null()) {
1023  // copy entries into A_local_crs_nc
1024  local_ordinal_type numRows = this->A_local_->getLocalNumRows();
1025  A_local_crs_nc_->resumeFill();
1026  nonconst_local_inds_host_view_type indices("indices", this->A_local_->getLocalMaxNumRowEntries());
1027  nonconst_values_host_view_type values("values", this->A_local_->getLocalMaxNumRowEntries());
1028  for (local_ordinal_type i = 0; i < numRows; i++) {
1029  size_t numEntries = 0;
1030  this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
1031  A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
1032  }
1033  A_local_crs_nc_->fillComplete(this->A_local_->getDomainMap(), this->A_local_->getRangeMap());
1034  }
1035 
1036  if (blockSize_ > 1) {
1037  auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs_nc_, blockSize_);
1038  Kokkos::deep_copy(A_local_bcrs_->getLocalMatrixDevice().values,
1039  crs_matrix_block_filled->getLocalMatrixDevice().values);
1040  } else {
1041  Kokkos::deep_copy(A_local_bcrs_->getLocalMatrixDevice().values,
1042  A_local_crs_nc_->getLocalMatrixDevice().values);
1043  }
1044  }
1045 
1046  if (!this->isKokkosKernelsStream_) {
1047  auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
1048  auto A_local_rowmap = lclMtx.graph.row_map;
1049  auto A_local_entries = lclMtx.graph.entries;
1050  auto A_local_values = lclMtx.values;
1051 
1052  // L_block_->resumeFill ();
1053  // U_block_->resumeFill ();
1054 
1055  if (L_block_->isLocallyIndexed()) {
1056  L_block_->setAllToScalar(STS::zero()); // Zero out L and U matrices
1057  U_block_->setAllToScalar(STS::zero());
1058  }
1059 
1060  using row_map_type = typename local_matrix_device_type::row_map_type;
1061 
1062  auto lclL = L_block_->getLocalMatrixDevice();
1063  row_map_type L_rowmap = lclL.graph.row_map;
1064  auto L_entries = lclL.graph.entries;
1065  auto L_values = lclL.values;
1066 
1067  auto lclU = U_block_->getLocalMatrixDevice();
1068  row_map_type U_rowmap = lclU.graph.row_map;
1069  auto U_entries = lclU.graph.entries;
1070  auto U_values = lclU.values;
1071 
1072  KokkosSparse::spiluk_numeric(KernelHandle_block_.getRawPtr(), this->LevelOfFill_,
1073  A_local_rowmap, A_local_entries, A_local_values,
1074  L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values);
1075 
1076  // Now call symbolic for sptrsvs
1077  KokkosSparse::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
1078  KokkosSparse::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
1079  } else {
1080  // Due to A_block_local_diagblks_values_v_, we must always refresh when streams are on
1081  assert(!this->hasStreamReordered_);
1082  auto lclMtx = A_local_bcrs_->getLocalMatrixDevice();
1083  auto lclCrsMtx = convertBsrToCrs<local_matrix_device_type, local_crs_matrix_device_type>(lclMtx);
1084  std::vector<local_crs_matrix_device_type> A_crs_local_diagblks_v(this->num_streams_);
1085  KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclCrsMtx, A_crs_local_diagblks_v);
1086 
1087  for (int i = 0; i < this->num_streams_; i++) {
1088  A_block_local_diagblks_v_[i] = local_matrix_device_type(A_crs_local_diagblks_v[i], blockSize_);
1089  // rowmap and entries should be the same, but not values
1090  A_block_local_diagblks_values_v_[i] = A_block_local_diagblks_v_[i].values;
1091  }
1092 
1093  std::vector<lno_row_view_t> L_rowmap_v(this->num_streams_);
1094  std::vector<lno_nonzero_view_t> L_entries_v(this->num_streams_);
1095  std::vector<scalar_nonzero_view_t> L_values_v(this->num_streams_);
1096  std::vector<lno_row_view_t> U_rowmap_v(this->num_streams_);
1097  std::vector<lno_nonzero_view_t> U_entries_v(this->num_streams_);
1098  std::vector<scalar_nonzero_view_t> U_values_v(this->num_streams_);
1099  std::vector<kk_handle_type*> KernelHandle_rawptr_v_(this->num_streams_);
1100 
1101  for (int i = 0; i < this->num_streams_; i++) {
1102  L_block_v_[i]->setAllToScalar(STS::zero()); // Zero out L and U matrices
1103  U_block_v_[i]->setAllToScalar(STS::zero());
1104 
1105  auto lclL = L_block_v_[i]->getLocalMatrixDevice();
1106  L_rowmap_v[i] = lclL.graph.row_map;
1107  L_entries_v[i] = lclL.graph.entries;
1108  L_values_v[i] = lclL.values;
1109 
1110  auto lclU = U_block_v_[i]->getLocalMatrixDevice();
1111  U_rowmap_v[i] = lclU.graph.row_map;
1112  U_entries_v[i] = lclU.graph.entries;
1113  U_values_v[i] = lclU.values;
1114  KernelHandle_rawptr_v_[i] = KernelHandle_block_v_[i].getRawPtr();
1115  }
1116 
1117  // L_block_->resumeFill ();
1118  // U_block_->resumeFill ();
1119 
1120  KokkosSparse::Experimental::spiluk_numeric_streams(
1121  this->exec_space_instances_, KernelHandle_rawptr_v_, this->LevelOfFill_,
1122  A_block_local_diagblks_rowmap_v_, A_block_local_diagblks_entries_v_, A_block_local_diagblks_values_v_,
1123  L_rowmap_v, L_entries_v, L_values_v,
1124  U_rowmap_v, U_entries_v, U_values_v);
1125 
1126  // Now call symbolic for sptrsvs
1127  for (int i = 0; i < this->num_streams_; i++) {
1128  KokkosSparse::sptrsv_symbolic(L_Sptrsv_KernelHandle_v_[i].getRawPtr(), L_rowmap_v[i], L_entries_v[i], L_values_v[i]);
1129  KokkosSparse::sptrsv_symbolic(U_Sptrsv_KernelHandle_v_[i].getRawPtr(), U_rowmap_v[i], U_entries_v[i], U_values_v[i]);
1130  }
1131  }
1132  }
1133  } // Stop timing
1134 
1135  // Sync everything back to device, for efficient solves.
1136  /*
1137  {
1138  typedef typename block_crs_matrix_type::device_type device_type;
1139  if (! A_block_.is_null ()) {
1140  Teuchos::RCP<block_crs_matrix_type> A_nc =
1141  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
1142  A_nc->template sync<device_type> ();
1143  }
1144  L_block_->template sync<device_type> ();
1145  U_block_->template sync<device_type> ();
1146  D_block_->template sync<device_type> ();
1147  }
1148  */
1149 
1150  this->isComputed_ = true;
1151  this->numCompute_ += 1;
1152  this->computeTime_ += (timer.wallTime() - startTime);
1153 }
1154 
1155 template <class MatrixType>
1156 template <typename X, typename Y>
1158  stream_apply_impl(const X& X_views, Y& Y_views, const bool use_temp_x, const bool use_temp_y, const std::vector<Teuchos::RCP<block_crs_matrix_type> >& LU_block_v, const std::vector<Teuchos::RCP<kk_handle_type> >& LU_sptrsv_handle_v, const LO numVecs) const {
1159  std::vector<lno_row_view_t> ptr_v(this->num_streams_);
1160  std::vector<lno_nonzero_view_t> ind_v(this->num_streams_);
1161  std::vector<scalar_nonzero_view_t> val_v(this->num_streams_);
1162  std::vector<kk_handle_type*> KernelHandle_rawptr_v(this->num_streams_);
1163 
1164  for (int j = 0; j < numVecs; ++j) {
1165  auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), j);
1166  auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), j);
1167  std::vector<decltype(X_view)> x_v(this->num_streams_);
1168  std::vector<decltype(Y_view)> y_v(this->num_streams_);
1169  local_ordinal_type stream_begin = 0;
1170  local_ordinal_type stream_end;
1171  for (int i = 0; i < this->num_streams_; i++) {
1172  auto LU_bcrs_i = LU_block_v[i];
1173  auto LUlocal_i = LU_bcrs_i->getLocalMatrixDevice();
1174  ptr_v[i] = LUlocal_i.graph.row_map;
1175  ind_v[i] = LUlocal_i.graph.entries;
1176  val_v[i] = LUlocal_i.values;
1177  stream_end = stream_begin + LUlocal_i.numRows() * blockSize_;
1178  if (use_temp_x) {
1179  x_v[i] = Kokkos::subview(tmp_, Kokkos::make_pair(stream_begin, stream_end));
1180  } else {
1181  x_v[i] = Kokkos::subview(X_view, Kokkos::make_pair(stream_begin, stream_end));
1182  }
1183  if (use_temp_y) {
1184  y_v[i] = Kokkos::subview(tmp_, Kokkos::make_pair(stream_begin, stream_end));
1185  } else {
1186  y_v[i] = Kokkos::subview(Y_view, Kokkos::make_pair(stream_begin, stream_end));
1187  }
1188  KernelHandle_rawptr_v[i] = LU_sptrsv_handle_v[i].getRawPtr();
1189  stream_begin = stream_end;
1190  }
1191 
1192  Kokkos::fence();
1193  KokkosSparse::Experimental::sptrsv_solve_streams(this->exec_space_instances_, KernelHandle_rawptr_v, ptr_v, ind_v, val_v, x_v, y_v);
1194  Kokkos::fence();
1195  }
1196 }
1197 
1198 template <class MatrixType>
1200  apply(const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
1201  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
1202  Teuchos::ETransp mode,
1203  scalar_type alpha,
1204  scalar_type beta) const {
1205  using Teuchos::RCP;
1206  typedef Tpetra::BlockMultiVector<scalar_type,
1208  BMV;
1209 
1211  this->A_.is_null(), std::runtime_error,
1212  "Ifpack2::Experimental::RBILUK::apply: The matrix is "
1213  "null. Please call setMatrix() with a nonnull input, then initialize() "
1214  "and compute(), before calling this method.");
1216  !this->isComputed(), std::runtime_error,
1217  "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
1218  "you must call compute() before calling this method.");
1219  TEUCHOS_TEST_FOR_EXCEPTION(
1220  X.getNumVectors() != Y.getNumVectors(), std::invalid_argument,
1221  "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
1222  "X.getNumVectors() = "
1223  << X.getNumVectors()
1224  << " != Y.getNumVectors() = " << Y.getNumVectors() << ".");
1225  TEUCHOS_TEST_FOR_EXCEPTION(
1226  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1227  "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1228  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1229  "fixed. There is a FIXME in this file about this very issue.");
1230 
1231  const LO blockMatSize = blockSize_ * blockSize_;
1232 
1233  const LO rowStride = blockSize_;
1234 
1235  Teuchos::Array<scalar_type> lclarray(blockSize_);
1236  little_host_vec_type lclvec((typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
1237  const scalar_type one = STM::one();
1238  const scalar_type zero = STM::zero();
1239 
1240  Teuchos::Time timer("RBILUK::apply");
1241  double startTime = timer.wallTime();
1242  { // Start timing
1243  Teuchos::TimeMonitor timeMon(timer);
1244  if (!this->isKokkosKernelsSpiluk_) {
1245  BMV yBlock(Y, *(this->Graph_->getA_Graph()->getDomainMap()), blockSize_);
1246  const BMV xBlock(X, *(this->A_->getColMap()), blockSize_);
1247 
1248  if (alpha == one && beta == zero) {
1249  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1250  // Start by solving L C = X for C. C must have the same Map
1251  // as D. We have to use a temp multivector, since our
1252  // implementation of triangular solves does not allow its
1253  // input and output to alias one another.
1254  //
1255  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
1256  const LO numVectors = xBlock.getNumVectors();
1257  BMV cBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
1258  BMV rBlock(*(this->Graph_->getA_Graph()->getDomainMap()), blockSize_, numVectors);
1259  for (LO imv = 0; imv < numVectors; ++imv) {
1260  for (size_t i = 0; i < D_block_->getLocalNumRows(); ++i) {
1261  LO local_row = i;
1262  const_host_little_vec_type xval =
1263  xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1264  little_host_vec_type cval =
1265  cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1266  // cval.assign(xval);
1267  Tpetra::COPY(xval, cval);
1268 
1269  local_inds_host_view_type colValsL;
1270  values_host_view_type valsL;
1271  L_block_->getLocalRowView(local_row, colValsL, valsL);
1272  LO NumL = (LO)colValsL.size();
1273 
1274  for (LO j = 0; j < NumL; ++j) {
1275  LO col = colValsL[j];
1276  const_host_little_vec_type prevVal =
1277  cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1278 
1279  const LO matOffset = blockMatSize * j;
1280  little_block_host_type lij((typename little_block_host_type::value_type*)&valsL[matOffset], blockSize_, rowStride);
1281 
1282  // cval.matvecUpdate(-one, lij, prevVal);
1283  Tpetra::GEMV(-one, lij, prevVal, cval);
1284  }
1285  }
1286  }
1287 
1288  // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
1289  D_block_->applyBlock(cBlock, rBlock);
1290 
1291  // Solve U Y = R.
1292  for (LO imv = 0; imv < numVectors; ++imv) {
1293  const LO numRows = D_block_->getLocalNumRows();
1294  for (LO i = 0; i < numRows; ++i) {
1295  LO local_row = (numRows - 1) - i;
1296  const_host_little_vec_type rval =
1297  rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1298  little_host_vec_type yval =
1299  yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1300  // yval.assign(rval);
1301  Tpetra::COPY(rval, yval);
1302 
1303  local_inds_host_view_type colValsU;
1304  values_host_view_type valsU;
1305  U_block_->getLocalRowView(local_row, colValsU, valsU);
1306  LO NumU = (LO)colValsU.size();
1307 
1308  for (LO j = 0; j < NumU; ++j) {
1309  LO col = colValsU[NumU - 1 - j];
1310  const_host_little_vec_type prevVal =
1311  yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1312 
1313  const LO matOffset = blockMatSize * (NumU - 1 - j);
1314  little_block_host_type uij((typename little_block_host_type::value_type*)&valsU[matOffset], blockSize_, rowStride);
1315 
1316  // yval.matvecUpdate(-one, uij, prevVal);
1317  Tpetra::GEMV(-one, uij, prevVal, yval);
1318  }
1319  }
1320  }
1321  } else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1322  TEUCHOS_TEST_FOR_EXCEPTION(
1323  true, std::runtime_error,
1324  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1325  }
1326  } else { // alpha != 1 or beta != 0
1327  if (alpha == zero) {
1328  if (beta == zero) {
1329  Y.putScalar(zero);
1330  } else {
1331  Y.scale(beta);
1332  }
1333  } else { // alpha != zero
1334  MV Y_tmp(Y.getMap(), Y.getNumVectors());
1335  apply(X, Y_tmp, mode);
1336  Y.update(alpha, Y_tmp, beta);
1337  }
1338  }
1339  } else {
1340  // Kokkos kernels impl.
1341  auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
1342  auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
1343  const LO numVecs = X.getNumVectors();
1344 
1345  TEUCHOS_TEST_FOR_EXCEPTION(mode != Teuchos::NO_TRANS, std::runtime_error,
1346  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1347 
1348  if (!this->isKokkosKernelsStream_) {
1349  auto lclL = L_block_->getLocalMatrixDevice();
1350  auto L_rowmap = lclL.graph.row_map;
1351  auto L_entries = lclL.graph.entries;
1352  auto L_values = lclL.values;
1353 
1354  auto lclU = U_block_->getLocalMatrixDevice();
1355  auto U_rowmap = lclU.graph.row_map;
1356  auto U_entries = lclU.graph.entries;
1357  auto U_values = lclU.values;
1358 
1359  for (LO vec = 0; vec < numVecs; ++vec) {
1360  auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
1361  KokkosSparse::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
1362  }
1363 
1364  for (LO vec = 0; vec < numVecs; ++vec) {
1365  auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1366  KokkosSparse::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
1367  }
1368  } else {
1369  stream_apply_impl(X_views, Y_views, false, true, L_block_v_, L_Sptrsv_KernelHandle_v_, numVecs);
1370  stream_apply_impl(X_views, Y_views, true, false, U_block_v_, U_Sptrsv_KernelHandle_v_, numVecs);
1371  }
1372 
1373  KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
1374  // Y.getWrappedDualView().sync();
1375  }
1376  } // Stop timing
1377 
1378  this->numApply_ += 1;
1379  this->applyTime_ += (timer.wallTime() - startTime);
1380 }
1381 
1382 template <class MatrixType>
1383 std::string RBILUK<MatrixType>::description() const {
1384  std::ostringstream os;
1385 
1386  // Output is a valid YAML dictionary in flow style. If you don't
1387  // like everything on a single line, you should call describe()
1388  // instead.
1389  os << "\"Ifpack2::Experimental::RBILUK\": {";
1390  os << "Initialized: " << (this->isInitialized() ? "true" : "false") << ", "
1391  << "Computed: " << (this->isComputed() ? "true" : "false") << ", ";
1392 
1393  os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
1394 
1395  if (this->A_.is_null()) {
1396  os << "Matrix: null";
1397  } else {
1398  os << "Global matrix dimensions: ["
1399  << this->A_->getGlobalNumRows() << ", " << this->A_->getGlobalNumCols() << "]"
1400  << ", Global nnz: " << this->A_->getGlobalNumEntries();
1401  }
1402 
1403  os << "}";
1404  return os.str();
1405 }
1406 
1407 } // namespace Experimental
1408 
1409 } // namespace Ifpack2
1410 
1411 // FIXME (mfh 26 Aug 2015) We only need to do instantiation for
1412 // MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
1413 // handled internally via dynamic cast.
1414 
1415 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S, LO, GO, N) \
1416  template class Ifpack2::Experimental::RBILUK<Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
1417  template class Ifpack2::Experimental::RBILUK<Tpetra::RowMatrix<S, LO, GO, N> >;
1418 
1419 #endif
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:344
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:111
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:159
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:208
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:101
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1200
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:120
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:213
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:115
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:134
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:107
T * getRawPtr() const
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:195
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:701
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:67
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:162
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:182
size_type size() const
static double wallTime()
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:127
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1383
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:95
bool is_null() const