Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Experimental_RBILUK_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
44 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
45 
46 #include "Tpetra_BlockMultiVector.hpp"
47 #include "Tpetra_BlockView.hpp"
48 #include "Ifpack2_OverlappingRowMatrix.hpp"
49 #include "Ifpack2_LocalFilter.hpp"
50 #include "Ifpack2_Utilities.hpp"
51 #include "Ifpack2_RILUK.hpp"
52 
53 //#define IFPACK2_RBILUK_INITIAL
54 #define IFPACK2_RBILUK_INITIAL_NOKK
55 
56 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
57 #include "KokkosBatched_Gemm_Decl.hpp"
58 #include "KokkosBatched_Gemm_Serial_Impl.hpp"
59 #include "KokkosBatched_Util.hpp"
60 #endif
61 
62 namespace Ifpack2 {
63 
64 namespace Experimental {
65 
66 namespace
67 {
68 template<class MatrixType>
69 struct LocalRowHandler
70 {
71  using LocalOrdinal = typename MatrixType::local_ordinal_type;
72  using row_matrix_type = Tpetra::RowMatrix<
73  typename MatrixType::scalar_type,
74  LocalOrdinal,
75  typename MatrixType::global_ordinal_type,
76  typename MatrixType::node_type>;
77  using inds_type = typename row_matrix_type::local_inds_host_view_type;
78  using vals_type = typename row_matrix_type::values_host_view_type;
79 
80  LocalRowHandler(Teuchos::RCP<const row_matrix_type> A)
81  : A_(std::move(A))
82  {
83  if (!A_->supportsRowViews())
84  {
85  const auto maxNumRowEntr = A_->getLocalMaxNumRowEntries();
86  const auto blockSize = A_->getBlockSize();
87  ind_nc_ = inds_type_nc("Ifpack2::RBILUK::LocalRowHandler::indices",maxNumRowEntr);
88  val_nc_ = vals_type_nc("Ifpack2::RBILUK::LocalRowHandler::values",maxNumRowEntr*blockSize*blockSize);
89  }
90  }
91 
92  void getLocalRow(LocalOrdinal local_row, inds_type & InI, vals_type & InV, LocalOrdinal & NumIn)
93  {
94  if (A_->supportsRowViews())
95  {
96  A_->getLocalRowView(local_row,InI,InV);
97  NumIn = (LocalOrdinal)InI.size();
98  }
99  else
100  {
101  size_t cnt;
102  A_->getLocalRowCopy(local_row,ind_nc_,val_nc_,cnt);
103  InI = ind_nc_;
104  InV = val_nc_;
105  NumIn = (LocalOrdinal)cnt;
106  }
107  }
108 
109 private:
110 
111  using inds_type_nc = typename row_matrix_type::nonconst_local_inds_host_view_type;
112  using vals_type_nc = typename row_matrix_type::nonconst_values_host_view_type;
113 
115  inds_type_nc ind_nc_;
116  vals_type_nc val_nc_;
117 };
118 
119 } // namespace
120 
121 template<class MatrixType>
123  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) )
124 {}
125 
126 template<class MatrixType>
128  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) )
129 {}
130 
131 
132 template<class MatrixType>
134 
135 
136 template<class MatrixType>
137 void
139 {
140  // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
141 
142  // It's legal for A to be null; in that case, you may not call
143  // initialize() until calling setMatrix() with a nonnull input.
144  // Regardless, setting the matrix invalidates any previous
145  // factorization.
146  if (A.getRawPtr () != this->A_.getRawPtr ())
147  {
148  this->isAllocated_ = false;
149  this->isInitialized_ = false;
150  this->isComputed_ = false;
151  this->Graph_ = Teuchos::null;
152  L_block_ = Teuchos::null;
153  U_block_ = Teuchos::null;
154  D_block_ = Teuchos::null;
155  }
156 }
157 
158 
159 
160 template<class MatrixType>
161 const typename RBILUK<MatrixType>::block_crs_matrix_type&
163 {
165  L_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
166  "is null. Please call initialize() (and preferably also compute()) "
167  "before calling this method. If the input matrix has not yet been set, "
168  "you must first call setMatrix() with a nonnull input matrix before you "
169  "may call initialize() or compute().");
170  return *L_block_;
171 }
172 
173 
174 template<class MatrixType>
175 const typename RBILUK<MatrixType>::block_crs_matrix_type&
177 {
179  D_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
180  "(of diagonal entries) is null. Please call initialize() (and "
181  "preferably also compute()) before calling this method. If the input "
182  "matrix has not yet been set, you must first call setMatrix() with a "
183  "nonnull input matrix before you may call initialize() or compute().");
184  return *D_block_;
185 }
186 
187 
188 template<class MatrixType>
189 const typename RBILUK<MatrixType>::block_crs_matrix_type&
191 {
193  U_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
194  "is null. Please call initialize() (and preferably also compute()) "
195  "before calling this method. If the input matrix has not yet been set, "
196  "you must first call setMatrix() with a nonnull input matrix before you "
197  "may call initialize() or compute().");
198  return *U_block_;
199 }
200 
201 template<class MatrixType>
203 {
204  using Teuchos::null;
205  using Teuchos::rcp;
206 
207  if (! this->isAllocated_) {
208  // Deallocate any existing storage. This avoids storing 2x
209  // memory, since RCP op= does not deallocate until after the
210  // assignment.
211  this->L_ = null;
212  this->U_ = null;
213  this->D_ = null;
214  L_block_ = null;
215  U_block_ = null;
216  D_block_ = null;
217 
218  // Allocate Matrix using ILUK graphs
219  L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
220  U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
221  D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
222  blockSize_) );
223  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
224  U_block_->setAllToScalar (STM::zero ());
225  D_block_->setAllToScalar (STM::zero ());
226 
227  }
228  this->isAllocated_ = true;
229 }
230 
231 namespace
232 {
233 template<class MatrixType>
235 makeLocalFilter (const Teuchos::RCP<const typename RBILUK<MatrixType>::row_matrix_type>& A)
236 {
237  using row_matrix_type = typename RBILUK<MatrixType>::row_matrix_type;
238  using Teuchos::RCP;
239  using Teuchos::rcp;
240  using Teuchos::rcp_dynamic_cast;
241  using Teuchos::rcp_implicit_cast;
242 
243  // If A_'s communicator only has one process, or if its column and
244  // row Maps are the same, then it is already local, so use it
245  // directly.
246  if (A->getRowMap ()->getComm ()->getSize () == 1 ||
247  A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
248  return A;
249  }
250 
251  // If A_ is already a LocalFilter, then use it directly. This
252  // should be the case if RILUK is being used through
253  // AdditiveSchwarz, for example.
254  RCP<const LocalFilter<row_matrix_type> > A_lf_r =
255  rcp_dynamic_cast<const LocalFilter<row_matrix_type> > (A);
256  if (! A_lf_r.is_null ()) {
257  return rcp_implicit_cast<const row_matrix_type> (A_lf_r);
258  }
259  else {
260  // A_'s communicator has more than one process, its row Map and
261  // its column Map differ, and A_ is not a LocalFilter. Thus, we
262  // have to wrap it in a LocalFilter.
263  return rcp (new LocalFilter<row_matrix_type> (A));
264  }
265 }
266 
267 template<class MatrixType>
269 getBlockCrsGraph(const Teuchos::RCP<const typename RBILUK<MatrixType>::row_matrix_type>& A,const typename MatrixType::local_ordinal_type blockSize)
270 {
271  using local_ordinal_type = typename MatrixType::local_ordinal_type;
272  using Teuchos::RCP;
273  using Teuchos::rcp;
274  using Teuchos::rcp_dynamic_cast;
275  using Teuchos::rcp_const_cast;
276  using Teuchos::rcpFromRef;
277  using row_matrix_type = typename RBILUK<MatrixType>::row_matrix_type;
278  using crs_graph_type = typename RBILUK<MatrixType>::crs_graph_type;
279  using block_crs_matrix_type = typename RBILUK<MatrixType>::block_crs_matrix_type;
280 
281  auto A_local = makeLocalFilter<MatrixType>(A);
282 
283  {
284  RCP<const LocalFilter<row_matrix_type> > filteredA =
285  rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_local);
286  RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = Teuchos::null;
287  RCP<const block_crs_matrix_type > A_block = Teuchos::null;
288  if (!filteredA.is_null ())
289  {
290  overlappedA = rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> > (filteredA->getUnderlyingMatrix ());
291  }
292 
293  if (! overlappedA.is_null ()) {
294  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
295  }
296  else if (!filteredA.is_null ()){
297  //If there is no overlap, filteredA could be the block CRS matrix
298  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
299  }
300  else
301  {
302  A_block = rcp_dynamic_cast<const block_crs_matrix_type>(A_local);
303  }
304 
305  if (!A_block.is_null()){
306  return rcpFromRef(A_block->getCrsGraph());
307  }
308  }
309 
310  // Could not extract block crs, make graph manually
311  {
312  local_ordinal_type numRows = A_local->getLocalNumRows();
313  Teuchos::Array<size_t> entriesPerRow(numRows);
314  for(local_ordinal_type i = 0; i < numRows; i++) {
315  entriesPerRow[i] = A_local->getNumEntriesInLocalRow(i);
316  }
317  RCP<crs_graph_type> A_local_crs_nc =
318  rcp (new crs_graph_type (A_local->getRowMap (),
319  A_local->getColMap (),
320  entriesPerRow()));
321 
322  {
323  using LocalRowHandler_t = LocalRowHandler<MatrixType>;
324  LocalRowHandler_t localRowHandler(A_local);
325  typename LocalRowHandler_t::inds_type indices;
326  typename LocalRowHandler_t::vals_type values;
327  for(local_ordinal_type i = 0; i < numRows; i++) {
328  local_ordinal_type numEntries = 0;
329  localRowHandler.getLocalRow(i, indices, values, numEntries);
330  A_local_crs_nc->insertLocalIndices(i, numEntries,indices.data());
331  }
332  }
333 
334  A_local_crs_nc->fillComplete (A_local->getDomainMap (), A_local->getRangeMap ());
335  return rcp_const_cast<const crs_graph_type> (A_local_crs_nc);
336  }
337 
338 }
339 
340 
341 } // namespace
342 
343 template<class MatrixType>
345 {
346  using Teuchos::RCP;
347  using Teuchos::rcp;
348  using Teuchos::rcp_dynamic_cast;
349  const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
350 
352  (this->A_.is_null (), std::runtime_error, prefix << "The matrix (A_, the "
353  "RowMatrix) is null. Please call setMatrix() with a nonnull input "
354  "before calling this method.");
356  (! this->A_->isFillComplete (), std::runtime_error, prefix << "The matrix "
357  "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
358  "initialize() or compute() with this matrix until the matrix is fill "
359  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
360  "underlying graph is fill complete.");
361 
362  blockSize_ = this->A_->getBlockSize();
363 
364  Teuchos::Time timer ("RBILUK::initialize");
365  double startTime = timer.wallTime();
366  { // Start timing
367  Teuchos::TimeMonitor timeMon (timer);
368 
369  // Calling initialize() means that the user asserts that the graph
370  // of the sparse matrix may have changed. We must not just reuse
371  // the previous graph in that case.
372  //
373  // Regarding setting isAllocated_ to false: Eventually, we may want
374  // some kind of clever memory reuse strategy, but it's always
375  // correct just to blow everything away and start over.
376  this->isInitialized_ = false;
377  this->isAllocated_ = false;
378  this->isComputed_ = false;
379  this->Graph_ = Teuchos::null;
380 
381  RCP<const crs_graph_type> matrixCrsGraph = getBlockCrsGraph<MatrixType>(this->A_,blockSize_);
382  this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (matrixCrsGraph,
383  this->LevelOfFill_, 0));
384 
385  this->Graph_->initialize ();
386  allocate_L_and_U_blocks ();
387 #ifdef IFPACK2_RBILUK_INITIAL
388  initAllValues ();
389 #endif
390  } // Stop timing
391 
392  this->isInitialized_ = true;
393  this->numInitialize_ += 1;
394  this->initializeTime_ += (timer.wallTime() - startTime);
395 }
396 
397 
398 template<class MatrixType>
399 void
401 initAllValues ()
402 {
403  using Teuchos::RCP;
404  typedef Tpetra::Map<LO,GO,node_type> map_type;
405 
406  LO NumIn = 0, NumL = 0, NumU = 0;
407  bool DiagFound = false;
408  size_t NumNonzeroDiags = 0;
409  size_t MaxNumEntries = this->A_->getLocalMaxNumRowEntries();
410  LO blockMatSize = blockSize_*blockSize_;
411 
412  // First check that the local row map ordering is the same as the local portion of the column map.
413  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
414  // implicitly assume that this is the case.
415  Teuchos::ArrayView<const GO> rowGIDs = this->A_->getRowMap()->getLocalElementList();
416  Teuchos::ArrayView<const GO> colGIDs = this->A_->getColMap()->getLocalElementList();
417  bool gidsAreConsistentlyOrdered=true;
418  GO indexOfInconsistentGID=0;
419  for (GO i=0; i<rowGIDs.size(); ++i) {
420  if (rowGIDs[i] != colGIDs[i]) {
421  gidsAreConsistentlyOrdered=false;
422  indexOfInconsistentGID=i;
423  break;
424  }
425  }
426  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
427  "The ordering of the local GIDs in the row and column maps is not the same"
428  << std::endl << "at index " << indexOfInconsistentGID
429  << ". Consistency is required, as all calculations are done with"
430  << std::endl << "local indexing.");
431 
432  // Allocate temporary space for extracting the strictly
433  // lower and upper parts of the matrix A.
434  Teuchos::Array<LO> LI(MaxNumEntries);
435  Teuchos::Array<LO> UI(MaxNumEntries);
436  Teuchos::Array<scalar_type> LV(MaxNumEntries*blockMatSize);
437  Teuchos::Array<scalar_type> UV(MaxNumEntries*blockMatSize);
438 
439  Teuchos::Array<scalar_type> diagValues(blockMatSize);
440 
441  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
442  U_block_->setAllToScalar (STM::zero ());
443  D_block_->setAllToScalar (STM::zero ()); // Set diagonal values to zero
444 
445  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
446  // host, so sync to host first. The const_cast is unfortunate but
447  // is our only option to make this correct.
448 
449  /*
450  const_cast<block_crs_matrix_type&> (A).sync_host ();
451  L_block_->sync_host ();
452  U_block_->sync_host ();
453  D_block_->sync_host ();
454  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
455  L_block_->modify_host ();
456  U_block_->modify_host ();
457  D_block_->modify_host ();
458  */
459 
460  RCP<const map_type> rowMap = L_block_->getRowMap ();
461 
462  // First we copy the user's matrix into L and U, regardless of fill level.
463  // It is important to note that L and U are populated using local indices.
464  // This means that if the row map GIDs are not monotonically increasing
465  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
466  // matrix is not the one that you would get if you based L (U) on GIDs.
467  // This is ok, as the *order* of the GIDs in the rowmap is a better
468  // expression of the user's intent than the GIDs themselves.
469 
470  //TODO BMK: Revisit this fence when BlockCrsMatrix is refactored.
471  Kokkos::fence();
472  using LocalRowHandler_t = LocalRowHandler<MatrixType>;
473  LocalRowHandler_t localRowHandler(this->A_);
474  typename LocalRowHandler_t::inds_type InI;
475  typename LocalRowHandler_t::vals_type InV;
476  for (size_t myRow=0; myRow<this->A_->getLocalNumRows(); ++myRow) {
477  LO local_row = myRow;
478 
479  localRowHandler.getLocalRow(local_row, InI, InV, NumIn);
480 
481  // Split into L and U (we don't assume that indices are ordered).
482  NumL = 0;
483  NumU = 0;
484  DiagFound = false;
485 
486  for (LO j = 0; j < NumIn; ++j) {
487  const LO k = InI[j];
488  const LO blockOffset = blockMatSize*j;
489 
490  if (k == local_row) {
491  DiagFound = true;
492  // Store perturbed diagonal in Tpetra::Vector D_
493  for (LO jj = 0; jj < blockMatSize; ++jj)
494  diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
495  D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
496  }
497  else if (k < 0) { // Out of range
499  true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
500  "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
501  "probably an artifact of the undocumented assumptions of the "
502  "original implementation (likely copied and pasted from Ifpack). "
503  "Nevertheless, the code I found here insisted on this being an error "
504  "state, so I will throw an exception here.");
505  }
506  else if (k < local_row) {
507  LI[NumL] = k;
508  const LO LBlockOffset = NumL*blockMatSize;
509  for (LO jj = 0; jj < blockMatSize; ++jj)
510  LV[LBlockOffset+jj] = InV[blockOffset+jj];
511  NumL++;
512  }
513  else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
514  UI[NumU] = k;
515  const LO UBlockOffset = NumU*blockMatSize;
516  for (LO jj = 0; jj < blockMatSize; ++jj)
517  UV[UBlockOffset+jj] = InV[blockOffset+jj];
518  NumU++;
519  }
520  }
521 
522  // Check in things for this row of L and U
523 
524  if (DiagFound) {
525  ++NumNonzeroDiags;
526  } else
527  {
528  for (LO jj = 0; jj < blockSize_; ++jj)
529  diagValues[jj*(blockSize_+1)] = this->Athresh_;
530  D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
531  }
532 
533  if (NumL) {
534  L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
535  }
536 
537  if (NumU) {
538  U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
539  }
540  }
541 
542  // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
543  // ever gets a device implementation.
544  /*
545  {
546  typedef typename block_crs_matrix_type::device_type device_type;
547  const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
548  L_block_->template sync<device_type> ();
549  U_block_->template sync<device_type> ();
550  D_block_->template sync<device_type> ();
551  }
552  */
553  this->isInitialized_ = true;
554 }
555 
556 namespace { // (anonymous)
557 
558 // For a given Kokkos::View type, possibly unmanaged, get the
559 // corresponding managed Kokkos::View type. This is handy for
560 // translating from little_block_type or little_host_vec_type (both
561 // possibly unmanaged) to their managed versions.
562 template<class LittleBlockType>
563 struct GetManagedView {
564  static_assert (Kokkos::is_view<LittleBlockType>::value,
565  "LittleBlockType must be a Kokkos::View.");
566  typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
567  typename LittleBlockType::array_layout,
568  typename LittleBlockType::device_type> managed_non_const_type;
569  static_assert (static_cast<int> (managed_non_const_type::rank) ==
570  static_cast<int> (LittleBlockType::rank),
571  "managed_non_const_type::rank != LittleBlock::rank. "
572  "Please report this bug to the Ifpack2 developers.");
573 };
574 
575 } // namespace (anonymous)
576 
577 template<class MatrixType>
579 {
580  typedef impl_scalar_type IST;
581  const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
582 
583  // initialize() checks this too, but it's easier for users if the
584  // error shows them the name of the method that they actually
585  // called, rather than the name of some internally called method.
587  (this->A_.is_null (), std::runtime_error, prefix << "The matrix (A_, "
588  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
589  "input before calling this method.");
591  (! this->A_->isFillComplete (), std::runtime_error, prefix << "The matrix "
592  "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
593  "initialize() or compute() with this matrix until the matrix is fill "
594  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
595  "underlying graph is fill complete.");
596 
597  if (! this->isInitialized ()) {
598  initialize (); // Don't count this in the compute() time
599  }
600 
601  // // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
602  // // host, so sync to host first. The const_cast is unfortunate but
603  // // is our only option to make this correct.
604  // if (! A_block_.is_null ()) {
605  // Teuchos::RCP<block_crs_matrix_type> A_nc =
606  // Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
607  // // A_nc->sync_host ();
608  // }
609  /*
610  L_block_->sync_host ();
611  U_block_->sync_host ();
612  D_block_->sync_host ();
613  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
614  L_block_->modify_host ();
615  U_block_->modify_host ();
616  D_block_->modify_host ();
617  */
618 
619  Teuchos::Time timer ("RBILUK::compute");
620  double startTime = timer.wallTime();
621  { // Start timing
622  Teuchos::TimeMonitor timeMon (timer);
623  this->isComputed_ = false;
624 
625  // MinMachNum should be officially defined, for now pick something a little
626  // bigger than IEEE underflow value
627 
628 // const scalar_type MinDiagonalValue = STS::rmin ();
629 // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
630  initAllValues ();
631 
632  size_t NumIn;
633  LO NumL, NumU, NumURead;
634 
635  // Get Maximum Row length
636  const size_t MaxNumEntries =
637  L_block_->getLocalMaxNumRowEntries () + U_block_->getLocalMaxNumRowEntries () + 1;
638 
639  const LO blockMatSize = blockSize_*blockSize_;
640 
641  // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
642  // expressing these strides explicitly, in order to finish #177
643  // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
644  const LO rowStride = blockSize_;
645 
646  Teuchos::Array<int> ipiv_teuchos(blockSize_);
647  Kokkos::View<int*, Kokkos::HostSpace,
648  Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
649  Teuchos::Array<IST> work_teuchos(blockSize_);
650  Kokkos::View<IST*, Kokkos::HostSpace,
651  Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
652 
653  size_t num_cols = U_block_->getColMap()->getLocalNumElements();
654  Teuchos::Array<int> colflag(num_cols);
655 
656  typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock ("diagModBlock", blockSize_, blockSize_);
657  typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp ("matTmp", blockSize_, blockSize_);
658  typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier ("multiplier", blockSize_, blockSize_);
659 
660 // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
661 
662  // Now start the factorization.
663 
664  // Need some integer workspace and pointers
665  LO NumUU;
666  for (size_t j = 0; j < num_cols; ++j) {
667  colflag[j] = -1;
668  }
669  Teuchos::Array<LO> InI(MaxNumEntries, 0);
670  Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
671 
672  const LO numLocalRows = L_block_->getLocalNumRows ();
673  for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
674 
675  // Fill InV, InI with current row of L, D and U combined
676 
677  NumIn = MaxNumEntries;
678  local_inds_host_view_type colValsL;
679  values_host_view_type valsL;
680 
681  L_block_->getLocalRowView(local_row, colValsL, valsL);
682  NumL = (LO) colValsL.size();
683  for (LO j = 0; j < NumL; ++j)
684  {
685  const LO matOffset = blockMatSize*j;
686  little_block_host_type lmat((typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
687  little_block_host_type lmatV((typename little_block_host_type::value_type*) &InV[matOffset],blockSize_,rowStride);
688  //lmatV.assign(lmat);
689  Tpetra::COPY (lmat, lmatV);
690  InI[j] = colValsL[j];
691  }
692 
693  little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
694  little_block_host_type dmatV((typename little_block_host_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
695  //dmatV.assign(dmat);
696  Tpetra::COPY (dmat, dmatV);
697  InI[NumL] = local_row;
698 
699  local_inds_host_view_type colValsU;
700  values_host_view_type valsU;
701  U_block_->getLocalRowView(local_row, colValsU, valsU);
702  NumURead = (LO) colValsU.size();
703  NumU = 0;
704  for (LO j = 0; j < NumURead; ++j)
705  {
706  if (!(colValsU[j] < numLocalRows)) continue;
707  InI[NumL+1+j] = colValsU[j];
708  const LO matOffset = blockMatSize*(NumL+1+j);
709  little_block_host_type umat((typename little_block_host_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
710  little_block_host_type umatV((typename little_block_host_type::value_type*) &InV[matOffset], blockSize_, rowStride);
711  //umatV.assign(umat);
712  Tpetra::COPY (umat, umatV);
713  NumU += 1;
714  }
715  NumIn = NumL+NumU+1;
716 
717  // Set column flags
718  for (size_t j = 0; j < NumIn; ++j) {
719  colflag[InI[j]] = j;
720  }
721 
722 #ifndef IFPACK2_RBILUK_INITIAL
723  for (LO i = 0; i < blockSize_; ++i)
724  for (LO j = 0; j < blockSize_; ++j){
725  {
726  diagModBlock(i,j) = 0;
727  }
728  }
729 #else
730  scalar_type diagmod = STM::zero (); // Off-diagonal accumulator
731  Kokkos::deep_copy (diagModBlock, diagmod);
732 #endif
733 
734  for (LO jj = 0; jj < NumL; ++jj) {
735  LO j = InI[jj];
736  little_block_host_type currentVal((typename little_block_host_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride); // current_mults++;
737  //multiplier.assign(currentVal);
738  Tpetra::COPY (currentVal, multiplier);
739 
740  const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j,j);
741  // alpha = 1, beta = 0
742 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
743  KokkosBatched::Experimental::SerialGemm
744  <KokkosBatched::Experimental::Trans::NoTranspose,
745  KokkosBatched::Experimental::Trans::NoTranspose,
746  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
747  (STS::one (), currentVal, dmatInverse, STS::zero (), matTmp);
748 #else
749  Tpetra::GEMM ("N", "N", STS::one (), currentVal, dmatInverse,
750  STS::zero (), matTmp);
751 #endif
752  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.data ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.data ()), reinterpret_cast<impl_scalar_type*> (matTmp.data ()), blockSize_);
753  //currentVal.assign(matTmp);
754  Tpetra::COPY (matTmp, currentVal);
755  local_inds_host_view_type UUI;
756  values_host_view_type UUV;
757 
758  U_block_->getLocalRowView(j, UUI, UUV);
759  NumUU = (LO) UUI.size();
760 
761  if (this->RelaxValue_ == STM::zero ()) {
762  for (LO k = 0; k < NumUU; ++k) {
763  if (!(UUI[k] < numLocalRows)) continue;
764  const int kk = colflag[UUI[k]];
765  if (kk > -1) {
766  little_block_host_type kkval((typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
767  little_block_host_type uumat((typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
768 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
769  KokkosBatched::Experimental::SerialGemm
770  <KokkosBatched::Experimental::Trans::NoTranspose,
771  KokkosBatched::Experimental::Trans::NoTranspose,
772  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
773  ( magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
774 #else
775  Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
776  STM::one (), kkval);
777 #endif
778  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.data ()), reinterpret_cast<impl_scalar_type*> (uumat.data ()), reinterpret_cast<impl_scalar_type*> (kkval.data ()), blockSize_, -STM::one(), STM::one());
779  }
780  }
781  }
782  else {
783  for (LO k = 0; k < NumUU; ++k) {
784  if (!(UUI[k] < numLocalRows)) continue;
785  const int kk = colflag[UUI[k]];
786  little_block_host_type uumat((typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
787  if (kk > -1) {
788  little_block_host_type kkval((typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
789 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
790  KokkosBatched::Experimental::SerialGemm
791  <KokkosBatched::Experimental::Trans::NoTranspose,
792  KokkosBatched::Experimental::Trans::NoTranspose,
793  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
794  (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
795 #else
796  Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
797  STM::one (), kkval);
798 #endif
799  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(kkval.data ()), blockSize_, -STM::one(), STM::one());
800  }
801  else {
802 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
803  KokkosBatched::Experimental::SerialGemm
804  <KokkosBatched::Experimental::Trans::NoTranspose,
805  KokkosBatched::Experimental::Trans::NoTranspose,
806  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
807  (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), diagModBlock);
808 #else
809  Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
810  STM::one (), diagModBlock);
811 #endif
812  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.data ()), blockSize_, -STM::one(), STM::one());
813  }
814  }
815  }
816  }
817  if (NumL) {
818  // Replace current row of L
819  L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
820  }
821 
822  // dmat.assign(dmatV);
823  Tpetra::COPY (dmatV, dmat);
824 
825  if (this->RelaxValue_ != STM::zero ()) {
826  //dmat.update(this->RelaxValue_, diagModBlock);
827  Tpetra::AXPY (this->RelaxValue_, diagModBlock, dmat);
828  }
829 
830 // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
831 // if (STS::real (DV[i]) < STM::zero ()) {
832 // DV[i] = -MinDiagonalValue;
833 // }
834 // else {
835 // DV[i] = MinDiagonalValue;
836 // }
837 // }
838 // else
839  {
840  int lapackInfo = 0;
841  for (int k = 0; k < blockSize_; ++k) {
842  ipiv[k] = 0;
843  }
844 
845  Tpetra::GETF2 (dmat, ipiv, lapackInfo);
846  //lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
848  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
849  "lapackInfo = " << lapackInfo << " which indicates an error in the factorization GETRF.");
850 
851  Tpetra::GETRI (dmat, ipiv, work, lapackInfo);
852  //lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
854  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
855  "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
856  }
857 
858  for (LO j = 0; j < NumU; ++j) {
859  little_block_host_type currentVal((typename little_block_host_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride); // current_mults++;
860  // scale U by the diagonal inverse
861 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
862  KokkosBatched::Experimental::SerialGemm
863  <KokkosBatched::Experimental::Trans::NoTranspose,
864  KokkosBatched::Experimental::Trans::NoTranspose,
865  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
866  (STS::one (), dmat, currentVal, STS::zero (), matTmp);
867 #else
868  Tpetra::GEMM ("N", "N", STS::one (), dmat, currentVal,
869  STS::zero (), matTmp);
870 #endif
871  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.data ()), reinterpret_cast<impl_scalar_type*>(currentVal.data ()), reinterpret_cast<impl_scalar_type*>(matTmp.data ()), blockSize_);
872  //currentVal.assign(matTmp);
873  Tpetra::COPY (matTmp, currentVal);
874  }
875 
876  if (NumU) {
877  // Replace current row of L and U
878  U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
879  }
880 
881 #ifndef IFPACK2_RBILUK_INITIAL
882  // Reset column flags
883  for (size_t j = 0; j < NumIn; ++j) {
884  colflag[InI[j]] = -1;
885  }
886 #else
887  // Reset column flags
888  for (size_t j = 0; j < num_cols; ++j) {
889  colflag[j] = -1;
890  }
891 #endif
892  }
893 
894  } // Stop timing
895 
896  // Sync everything back to device, for efficient solves.
897  /*
898  {
899  typedef typename block_crs_matrix_type::device_type device_type;
900  if (! A_block_.is_null ()) {
901  Teuchos::RCP<block_crs_matrix_type> A_nc =
902  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
903  A_nc->template sync<device_type> ();
904  }
905  L_block_->template sync<device_type> ();
906  U_block_->template sync<device_type> ();
907  D_block_->template sync<device_type> ();
908  }
909  */
910 
911  this->isComputed_ = true;
912  this->numCompute_ += 1;
913  this->computeTime_ += (timer.wallTime() - startTime);
914 }
915 
916 
917 template<class MatrixType>
918 void
920 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
921  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
922  Teuchos::ETransp mode,
923  scalar_type alpha,
924  scalar_type beta) const
925 {
926  using Teuchos::RCP;
927  typedef Tpetra::BlockMultiVector<scalar_type,
929 
931  this->A_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::apply: The matrix is "
932  "null. Please call setMatrix() with a nonnull input, then initialize() "
933  "and compute(), before calling this method.");
935  ! this->isComputed (), std::runtime_error,
936  "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
937  "you must call compute() before calling this method.");
938  TEUCHOS_TEST_FOR_EXCEPTION(
939  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
940  "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
941  "X.getNumVectors() = " << X.getNumVectors ()
942  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
943  TEUCHOS_TEST_FOR_EXCEPTION(
944  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
945  "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
946  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
947  "fixed. There is a FIXME in this file about this very issue.");
948 
949  const LO blockMatSize = blockSize_*blockSize_;
950 
951  const LO rowStride = blockSize_;
952 
953  BMV yBlock (Y, * (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_);
954  const BMV xBlock (X, * (this->A_->getColMap ()), blockSize_);
955 
956  Teuchos::Array<scalar_type> lclarray(blockSize_);
957  little_host_vec_type lclvec((typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
958  const scalar_type one = STM::one ();
959  const scalar_type zero = STM::zero ();
960 
961  Teuchos::Time timer ("RBILUK::apply");
962  double startTime = timer.wallTime();
963  { // Start timing
964  Teuchos::TimeMonitor timeMon (timer);
965  if (alpha == one && beta == zero) {
966  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
967  // Start by solving L C = X for C. C must have the same Map
968  // as D. We have to use a temp multivector, since our
969  // implementation of triangular solves does not allow its
970  // input and output to alias one another.
971  //
972  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
973  const LO numVectors = xBlock.getNumVectors();
974  BMV cBlock (* (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
975  BMV rBlock (* (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
976  for (LO imv = 0; imv < numVectors; ++imv)
977  {
978  for (size_t i = 0; i < D_block_->getLocalNumRows(); ++i)
979  {
980  LO local_row = i;
981  const_host_little_vec_type xval =
982  xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
983  little_host_vec_type cval =
984  cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
985  //cval.assign(xval);
986  Tpetra::COPY (xval, cval);
987 
988  local_inds_host_view_type colValsL;
989  values_host_view_type valsL;
990  L_block_->getLocalRowView(local_row, colValsL, valsL);
991  LO NumL = (LO) colValsL.size();
992 
993  for (LO j = 0; j < NumL; ++j)
994  {
995  LO col = colValsL[j];
996  const_host_little_vec_type prevVal =
997  cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
998 
999  const LO matOffset = blockMatSize*j;
1000  little_block_host_type lij((typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
1001 
1002  //cval.matvecUpdate(-one, lij, prevVal);
1003  Tpetra::GEMV (-one, lij, prevVal, cval);
1004  }
1005  }
1006  }
1007 
1008  // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
1009  D_block_->applyBlock(cBlock, rBlock);
1010 
1011  // Solve U Y = R.
1012  for (LO imv = 0; imv < numVectors; ++imv)
1013  {
1014  const LO numRows = D_block_->getLocalNumRows();
1015  for (LO i = 0; i < numRows; ++i)
1016  {
1017  LO local_row = (numRows-1)-i;
1018  const_host_little_vec_type rval =
1019  rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1020  little_host_vec_type yval =
1021  yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1022  //yval.assign(rval);
1023  Tpetra::COPY (rval, yval);
1024 
1025  local_inds_host_view_type colValsU;
1026  values_host_view_type valsU;
1027  U_block_->getLocalRowView(local_row, colValsU, valsU);
1028  LO NumU = (LO) colValsU.size();
1029 
1030  for (LO j = 0; j < NumU; ++j)
1031  {
1032  LO col = colValsU[NumU-1-j];
1033  const_host_little_vec_type prevVal =
1034  yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1035 
1036  const LO matOffset = blockMatSize*(NumU-1-j);
1037  little_block_host_type uij((typename little_block_host_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
1038 
1039  //yval.matvecUpdate(-one, uij, prevVal);
1040  Tpetra::GEMV (-one, uij, prevVal, yval);
1041  }
1042  }
1043  }
1044  }
1045  else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1046  TEUCHOS_TEST_FOR_EXCEPTION(
1047  true, std::runtime_error,
1048  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm. ");
1049  }
1050  }
1051  else { // alpha != 1 or beta != 0
1052  if (alpha == zero) {
1053  if (beta == zero) {
1054  Y.putScalar (zero);
1055  } else {
1056  Y.scale (beta);
1057  }
1058  } else { // alpha != zero
1059  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1060  apply (X, Y_tmp, mode);
1061  Y.update (alpha, Y_tmp, beta);
1062  }
1063  }
1064  } // Stop timing
1065 
1066  this->numApply_ += 1;
1067  this->applyTime_ += (timer.wallTime() - startTime);
1068 }
1069 
1070 
1071 template<class MatrixType>
1073 {
1074  std::ostringstream os;
1075 
1076  // Output is a valid YAML dictionary in flow style. If you don't
1077  // like everything on a single line, you should call describe()
1078  // instead.
1079  os << "\"Ifpack2::Experimental::RBILUK\": {";
1080  os << "Initialized: " << (this->isInitialized () ? "true" : "false") << ", "
1081  << "Computed: " << (this->isComputed () ? "true" : "false") << ", ";
1082 
1083  os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
1084 
1085  if (this->A_.is_null ()) {
1086  os << "Matrix: null";
1087  }
1088  else {
1089  os << "Global matrix dimensions: ["
1090  << this->A_->getGlobalNumRows () << ", " << this->A_->getGlobalNumCols () << "]"
1091  << ", Global nnz: " << this->A_->getGlobalNumEntries();
1092  }
1093 
1094  os << "}";
1095  return os.str ();
1096 }
1097 
1098 } // namespace Experimental
1099 
1100 } // namespace Ifpack2
1101 
1102 // FIXME (mfh 26 Aug 2015) We only need to do instantiation for
1103 // MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
1104 // handled internally via dynamic cast.
1105 
1106 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
1107  template class Ifpack2::Experimental::RBILUK< Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
1108  template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
1109 
1110 #endif
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:344
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:146
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:133
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:190
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:136
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:920
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:153
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:245
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:150
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:142
T * getRawPtr() const
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:176
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:578
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:100
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:138
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:162
size_type size() const
static double wallTime()
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:159
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:1072
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:128
bool is_null() const