Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Experimental_RBILUK_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
44 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
45 
46 #include "Tpetra_BlockMultiVector.hpp"
47 #include "Tpetra_BlockView.hpp"
48 #include "Ifpack2_OverlappingRowMatrix.hpp"
49 #include "Ifpack2_LocalFilter.hpp"
50 #include "Ifpack2_Utilities.hpp"
51 #include "Ifpack2_RILUK.hpp"
52 
53 //#define IFPACK2_RBILUK_INITIAL
54 #define IFPACK2_RBILUK_INITIAL_NOKK
55 
56 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
57 #include "KokkosBatched_Gemm_Decl.hpp"
58 #include "KokkosBatched_Gemm_Serial_Impl.hpp"
59 #include "KokkosBatched_Util.hpp"
60 #endif
61 
62 namespace Ifpack2 {
63 
64 namespace Experimental {
65 
66 template<class MatrixType>
68  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
69  A_(Matrix_in),
70  A_block_(Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(Matrix_in))
71 {}
72 
73 template<class MatrixType>
75  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
76  A_block_(Matrix_in)
77 {}
78 
79 
80 template<class MatrixType>
82 
83 
84 template<class MatrixType>
85 void
87 {
88  // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
89 
90  // It's legal for A to be null; in that case, you may not call
91  // initialize() until calling setMatrix() with a nonnull input.
92  // Regardless, setting the matrix invalidates any previous
93  // factorization.
94  if (A.getRawPtr () != A_block_.getRawPtr ())
95  {
96  this->isAllocated_ = false;
97  this->isInitialized_ = false;
98  this->isComputed_ = false;
99  this->Graph_ = Teuchos::null;
100  L_block_ = Teuchos::null;
101  U_block_ = Teuchos::null;
102  D_block_ = Teuchos::null;
103  A_block_ = A;
104  }
105 }
106 
107 
108 
109 template<class MatrixType>
110 const typename RBILUK<MatrixType>::block_crs_matrix_type&
112 {
114  L_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
115  "is null. Please call initialize() (and preferably also compute()) "
116  "before calling this method. If the input matrix has not yet been set, "
117  "you must first call setMatrix() with a nonnull input matrix before you "
118  "may call initialize() or compute().");
119  return *L_block_;
120 }
121 
122 
123 template<class MatrixType>
124 const typename RBILUK<MatrixType>::block_crs_matrix_type&
126 {
128  D_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
129  "(of diagonal entries) is null. Please call initialize() (and "
130  "preferably also compute()) before calling this method. If the input "
131  "matrix has not yet been set, you must first call setMatrix() with a "
132  "nonnull input matrix before you may call initialize() or compute().");
133  return *D_block_;
134 }
135 
136 
137 template<class MatrixType>
138 const typename RBILUK<MatrixType>::block_crs_matrix_type&
140 {
142  U_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
143  "is null. Please call initialize() (and preferably also compute()) "
144  "before calling this method. If the input matrix has not yet been set, "
145  "you must first call setMatrix() with a nonnull input matrix before you "
146  "may call initialize() or compute().");
147  return *U_block_;
148 }
149 
150 template<class MatrixType>
152 {
153  using Teuchos::null;
154  using Teuchos::rcp;
155 
156  if (! this->isAllocated_) {
157  // Deallocate any existing storage. This avoids storing 2x
158  // memory, since RCP op= does not deallocate until after the
159  // assignment.
160  this->L_ = null;
161  this->U_ = null;
162  this->D_ = null;
163  L_block_ = null;
164  U_block_ = null;
165  D_block_ = null;
166 
167  // Allocate Matrix using ILUK graphs
168  L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
169  U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
170  D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
171  blockSize_) );
172  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
173  U_block_->setAllToScalar (STM::zero ());
174  D_block_->setAllToScalar (STM::zero ());
175 
176  }
177  this->isAllocated_ = true;
178 }
179 
180 template<class MatrixType>
183  return A_block_;
184 }
185 
186 template<class MatrixType>
188 {
189  using Teuchos::RCP;
190  using Teuchos::rcp;
191  using Teuchos::rcp_dynamic_cast;
192  const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
193 
194  // FIXME (mfh 04 Nov 2015) Apparently it's OK for A_ to be null.
195  // That probably means that this preconditioner was created with a
196  // BlockCrsMatrix directly, so it doesn't need the LocalFilter.
197 
198  // TEUCHOS_TEST_FOR_EXCEPTION
199  // (A_.is_null (), std::runtime_error, prefix << "The matrix (A_, the "
200  // "RowMatrix) is null. Please call setMatrix() with a nonnull input "
201  // "before calling this method.");
202 
203  if (A_block_.is_null ()) {
204  // FIXME (mfh 04 Nov 2015) Why does the input have to be a
205  // LocalFilter? Why can't we just take a regular matrix, and
206  // apply a LocalFilter only if necessary, like other "local"
207  // Ifpack2 preconditioners already do?
208  RCP<const LocalFilter<row_matrix_type> > filteredA =
209  rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_);
211  (filteredA.is_null (), std::runtime_error, prefix <<
212  "Cannot cast to filtered matrix.");
213  RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA =
214  rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> > (filteredA->getUnderlyingMatrix ());
215  if (! overlappedA.is_null ()) {
216  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
217  } else {
218  //If there is no overlap, filteredA could be the block CRS matrix
219  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
220  }
221  }
222 
224  (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
225  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
226  "input before calling this method.");
228  (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
229  "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
230  "initialize() or compute() with this matrix until the matrix is fill "
231  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
232  "underlying graph is fill complete.");
233 
234  blockSize_ = A_block_->getBlockSize();
235 
236  Teuchos::Time timer ("RBILUK::initialize");
237  { // Start timing
238  Teuchos::TimeMonitor timeMon (timer);
239 
240  // Calling initialize() means that the user asserts that the graph
241  // of the sparse matrix may have changed. We must not just reuse
242  // the previous graph in that case.
243  //
244  // Regarding setting isAllocated_ to false: Eventually, we may want
245  // some kind of clever memory reuse strategy, but it's always
246  // correct just to blow everything away and start over.
247  this->isInitialized_ = false;
248  this->isAllocated_ = false;
249  this->isComputed_ = false;
250  this->Graph_ = Teuchos::null;
251 
252  typedef Tpetra::CrsGraph<local_ordinal_type,
254  node_type> crs_graph_type;
255 
256  RCP<const crs_graph_type> matrixCrsGraph = Teuchos::rcpFromRef(A_block_->getCrsGraph() );
257  this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type> (matrixCrsGraph,
258  this->LevelOfFill_, 0));
259 
260  this->Graph_->initialize ();
261  allocate_L_and_U_blocks ();
262 #ifdef IFPACK2_RBILUK_INITIAL
263  initAllValues (*A_block_);
264 #endif
265  } // Stop timing
266 
267  this->isInitialized_ = true;
268  this->numInitialize_ += 1;
269  this->initializeTime_ += timer.totalElapsedTime ();
270 }
271 
272 
273 template<class MatrixType>
274 void
276 initAllValues (const block_crs_matrix_type& A)
277 {
278  using Teuchos::RCP;
279  typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
280 
281  local_ordinal_type NumIn = 0, NumL = 0, NumU = 0;
282  bool DiagFound = false;
283  size_t NumNonzeroDiags = 0;
284  size_t MaxNumEntries = A.getNodeMaxNumRowEntries();
285  local_ordinal_type blockMatSize = blockSize_*blockSize_;
286 
287  // First check that the local row map ordering is the same as the local portion of the column map.
288  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
289  // implicitly assume that this is the case.
290  Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getNodeElementList();
291  Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getNodeElementList();
292  bool gidsAreConsistentlyOrdered=true;
293  global_ordinal_type indexOfInconsistentGID=0;
294  for (global_ordinal_type i=0; i<rowGIDs.size(); ++i) {
295  if (rowGIDs[i] != colGIDs[i]) {
296  gidsAreConsistentlyOrdered=false;
297  indexOfInconsistentGID=i;
298  break;
299  }
300  }
301  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
302  "The ordering of the local GIDs in the row and column maps is not the same"
303  << std::endl << "at index " << indexOfInconsistentGID
304  << ". Consistency is required, as all calculations are done with"
305  << std::endl << "local indexing.");
306 
307  // Allocate temporary space for extracting the strictly
308  // lower and upper parts of the matrix A.
309  Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
310  Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
311  Teuchos::Array<scalar_type> LV(MaxNumEntries*blockMatSize);
312  Teuchos::Array<scalar_type> UV(MaxNumEntries*blockMatSize);
313 
314  Teuchos::Array<scalar_type> diagValues(blockMatSize);
315 
316  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
317  U_block_->setAllToScalar (STM::zero ());
318  D_block_->setAllToScalar (STM::zero ()); // Set diagonal values to zero
319 
320  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
321  // host, so sync to host first. The const_cast is unfortunate but
322  // is our only option to make this correct.
323 
324  const_cast<block_crs_matrix_type&> (A).sync_host ();
325  L_block_->sync_host ();
326  U_block_->sync_host ();
327  D_block_->sync_host ();
328  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
329  L_block_->modify_host ();
330  U_block_->modify_host ();
331  D_block_->modify_host ();
332 
333  RCP<const map_type> rowMap = L_block_->getRowMap ();
334 
335  // First we copy the user's matrix into L and U, regardless of fill level.
336  // It is important to note that L and U are populated using local indices.
337  // This means that if the row map GIDs are not monotonically increasing
338  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
339  // matrix is not the one that you would get if you based L (U) on GIDs.
340  // This is ok, as the *order* of the GIDs in the rowmap is a better
341  // expression of the user's intent than the GIDs themselves.
342 
343  for (size_t myRow=0; myRow<A.getNodeNumRows(); ++myRow) {
344  local_ordinal_type local_row = myRow;
345 
346  //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
347  // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
348  const local_ordinal_type * InI = 0;
349  scalar_type * InV = 0;
350  A.getLocalRowView(local_row, InI, InV, NumIn);
351 
352  // Split into L and U (we don't assume that indices are ordered).
353 
354  NumL = 0;
355  NumU = 0;
356  DiagFound = false;
357 
358  for (local_ordinal_type j = 0; j < NumIn; ++j) {
359  const local_ordinal_type k = InI[j];
360  const local_ordinal_type blockOffset = blockMatSize*j;
361 
362  if (k == local_row) {
363  DiagFound = true;
364  // Store perturbed diagonal in Tpetra::Vector D_
365  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
366  diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
367  D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
368  }
369  else if (k < 0) { // Out of range
371  true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
372  "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
373  "probably an artifact of the undocumented assumptions of the "
374  "original implementation (likely copied and pasted from Ifpack). "
375  "Nevertheless, the code I found here insisted on this being an error "
376  "state, so I will throw an exception here.");
377  }
378  else if (k < local_row) {
379  LI[NumL] = k;
380  const local_ordinal_type LBlockOffset = NumL*blockMatSize;
381  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
382  LV[LBlockOffset+jj] = InV[blockOffset+jj];
383  NumL++;
384  }
385  else if (Teuchos::as<size_t>(k) <= rowMap->getNodeNumElements()) {
386  UI[NumU] = k;
387  const local_ordinal_type UBlockOffset = NumU*blockMatSize;
388  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
389  UV[UBlockOffset+jj] = InV[blockOffset+jj];
390  NumU++;
391  }
392  }
393 
394  // Check in things for this row of L and U
395 
396  if (DiagFound) {
397  ++NumNonzeroDiags;
398  } else
399  {
400  for (local_ordinal_type jj = 0; jj < blockSize_; ++jj)
401  diagValues[jj*(blockSize_+1)] = this->Athresh_;
402  D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
403  }
404 
405  if (NumL) {
406  L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
407  }
408 
409  if (NumU) {
410  U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
411  }
412  }
413 
414  // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
415  // ever gets a device implementation.
416  {
417  typedef typename block_crs_matrix_type::device_type device_type;
418  const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
419  L_block_->template sync<device_type> ();
420  U_block_->template sync<device_type> ();
421  D_block_->template sync<device_type> ();
422  }
423  this->isInitialized_ = true;
424 }
425 
426 namespace { // (anonymous)
427 
428 // For a given Kokkos::View type, possibly unmanaged, get the
429 // corresponding managed Kokkos::View type. This is handy for
430 // translating from little_block_type or little_vec_type (both
431 // possibly unmanaged) to their managed versions.
432 template<class LittleBlockType>
433 struct GetManagedView {
434  static_assert (Kokkos::Impl::is_view<LittleBlockType>::value,
435  "LittleBlockType must be a Kokkos::View.");
436  typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
437  typename LittleBlockType::array_layout,
438  typename LittleBlockType::device_type> managed_non_const_type;
439  static_assert (static_cast<int> (managed_non_const_type::rank) ==
440  static_cast<int> (LittleBlockType::rank),
441  "managed_non_const_type::rank != LittleBlock::rank. "
442  "Please report this bug to the Ifpack2 developers.");
443 };
444 
445 } // namespace (anonymous)
446 
447 template<class MatrixType>
449 {
450  typedef impl_scalar_type IST;
451  const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
452 
453  // initialize() checks this too, but it's easier for users if the
454  // error shows them the name of the method that they actually
455  // called, rather than the name of some internally called method.
457  (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
458  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
459  "input before calling this method.");
461  (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
462  "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
463  "initialize() or compute() with this matrix until the matrix is fill "
464  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
465  "underlying graph is fill complete.");
466 
467  if (! this->isInitialized ()) {
468  initialize (); // Don't count this in the compute() time
469  }
470 
471  // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
472  // host, so sync to host first. The const_cast is unfortunate but
473  // is our only option to make this correct.
474  if (! A_block_.is_null ()) {
476  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
477  A_nc->sync_host ();
478  }
479  L_block_->sync_host ();
480  U_block_->sync_host ();
481  D_block_->sync_host ();
482  // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
483  L_block_->modify_host ();
484  U_block_->modify_host ();
485  D_block_->modify_host ();
486 
487  Teuchos::Time timer ("RBILUK::compute");
488  { // Start timing
489  Teuchos::TimeMonitor timeMon (timer);
490  this->isComputed_ = false;
491 
492  // MinMachNum should be officially defined, for now pick something a little
493  // bigger than IEEE underflow value
494 
495 // const scalar_type MinDiagonalValue = STS::rmin ();
496 // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
497  initAllValues (*A_block_);
498 
499  size_t NumIn;
500  local_ordinal_type NumL, NumU, NumURead;
501 
502  // Get Maximum Row length
503  const size_t MaxNumEntries =
504  L_block_->getNodeMaxNumRowEntries () + U_block_->getNodeMaxNumRowEntries () + 1;
505 
506  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
507 
508  // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
509  // expressing these strides explicitly, in order to finish #177
510  // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
511  const local_ordinal_type rowStride = blockSize_;
512 
513  Teuchos::Array<int> ipiv_teuchos(blockSize_);
514  Kokkos::View<int*, Kokkos::HostSpace,
515  Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
516  Teuchos::Array<IST> work_teuchos(blockSize_);
517  Kokkos::View<IST*, Kokkos::HostSpace,
518  Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
519 
520  size_t num_cols = U_block_->getColMap()->getNodeNumElements();
521  Teuchos::Array<int> colflag(num_cols);
522 
523  typename GetManagedView<little_block_type>::managed_non_const_type diagModBlock ("diagModBlock", blockSize_, blockSize_);
524  typename GetManagedView<little_block_type>::managed_non_const_type matTmp ("matTmp", blockSize_, blockSize_);
525  typename GetManagedView<little_block_type>::managed_non_const_type multiplier ("multiplier", blockSize_, blockSize_);
526 
527 // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
528 
529  // Now start the factorization.
530 
531  // Need some integer workspace and pointers
532  local_ordinal_type NumUU;
533  for (size_t j = 0; j < num_cols; ++j) {
534  colflag[j] = -1;
535  }
536  Teuchos::Array<local_ordinal_type> InI(MaxNumEntries, 0);
537  Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
538 
539  const local_ordinal_type numLocalRows = L_block_->getNodeNumRows ();
540  for (local_ordinal_type local_row = 0; local_row < numLocalRows; ++local_row) {
541 
542  // Fill InV, InI with current row of L, D and U combined
543 
544  NumIn = MaxNumEntries;
545  const local_ordinal_type * colValsL;
546  scalar_type * valsL;
547 
548  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
549  for (local_ordinal_type j = 0; j < NumL; ++j)
550  {
551  const local_ordinal_type matOffset = blockMatSize*j;
552  little_block_type lmat((typename little_block_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
553  little_block_type lmatV((typename little_block_type::value_type*) &InV[matOffset],blockSize_,rowStride);
554  //lmatV.assign(lmat);
555  Tpetra::COPY (lmat, lmatV);
556  InI[j] = colValsL[j];
557  }
558 
559  little_block_type dmat = D_block_->getLocalBlock(local_row, local_row);
560  little_block_type dmatV((typename little_block_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
561  //dmatV.assign(dmat);
562  Tpetra::COPY (dmat, dmatV);
563  InI[NumL] = local_row;
564 
565  const local_ordinal_type * colValsU;
566  scalar_type * valsU;
567  U_block_->getLocalRowView(local_row, colValsU, valsU, NumURead);
568  NumU = 0;
569  for (local_ordinal_type j = 0; j < NumURead; ++j)
570  {
571  if (!(colValsU[j] < numLocalRows)) continue;
572  InI[NumL+1+j] = colValsU[j];
573  const local_ordinal_type matOffset = blockMatSize*(NumL+1+j);
574  little_block_type umat((typename little_block_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
575  little_block_type umatV((typename little_block_type::value_type*) &InV[matOffset], blockSize_, rowStride);
576  //umatV.assign(umat);
577  Tpetra::COPY (umat, umatV);
578  NumU += 1;
579  }
580  NumIn = NumL+NumU+1;
581 
582  // Set column flags
583  for (size_t j = 0; j < NumIn; ++j) {
584  colflag[InI[j]] = j;
585  }
586 
587 #ifndef IFPACK2_RBILUK_INITIAL
588  for (local_ordinal_type i = 0; i < blockSize_; ++i)
589  for (local_ordinal_type j = 0; j < blockSize_; ++j){
590  {
591  diagModBlock(i,j) = 0;
592  }
593  }
594 #else
595  scalar_type diagmod = STM::zero (); // Off-diagonal accumulator
596  Kokkos::deep_copy (diagModBlock, diagmod);
597 #endif
598 
599  for (local_ordinal_type jj = 0; jj < NumL; ++jj) {
600  local_ordinal_type j = InI[jj];
601  little_block_type currentVal((typename little_block_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride); // current_mults++;
602  //multiplier.assign(currentVal);
603  Tpetra::COPY (currentVal, multiplier);
604 
605  const little_block_type dmatInverse = D_block_->getLocalBlock(j,j);
606  // alpha = 1, beta = 0
607 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
608  KokkosBatched::Experimental::SerialGemm
609  <KokkosBatched::Experimental::Trans::NoTranspose,
610  KokkosBatched::Experimental::Trans::NoTranspose,
611  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
612  (STS::one (), currentVal, dmatInverse, STS::zero (), matTmp);
613 #else
614  Tpetra::GEMM ("N", "N", STS::one (), currentVal, dmatInverse,
615  STS::zero (), matTmp);
616 #endif
617  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.data ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.data ()), reinterpret_cast<impl_scalar_type*> (matTmp.data ()), blockSize_);
618  //currentVal.assign(matTmp);
619  Tpetra::COPY (matTmp, currentVal);
620 
621  const local_ordinal_type * UUI;
622  scalar_type * UUV;
623  U_block_->getLocalRowView(j, UUI, UUV, NumUU);
624 
625  if (this->RelaxValue_ == STM::zero ()) {
626  for (local_ordinal_type k = 0; k < NumUU; ++k) {
627  if (!(UUI[k] < numLocalRows)) continue;
628  const int kk = colflag[UUI[k]];
629  if (kk > -1) {
630  little_block_type kkval((typename little_block_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
631  little_block_type uumat((typename little_block_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
632 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
633  KokkosBatched::Experimental::SerialGemm
634  <KokkosBatched::Experimental::Trans::NoTranspose,
635  KokkosBatched::Experimental::Trans::NoTranspose,
636  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
637  ( magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
638 #else
639  Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
640  STM::one (), kkval);
641 #endif
642  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.data ()), reinterpret_cast<impl_scalar_type*> (uumat.data ()), reinterpret_cast<impl_scalar_type*> (kkval.data ()), blockSize_, -STM::one(), STM::one());
643  }
644  }
645  }
646  else {
647  for (local_ordinal_type k = 0; k < NumUU; ++k) {
648  if (!(UUI[k] < numLocalRows)) continue;
649  const int kk = colflag[UUI[k]];
650  little_block_type uumat((typename little_block_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
651  if (kk > -1) {
652  little_block_type kkval((typename little_block_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
653 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
654  KokkosBatched::Experimental::SerialGemm
655  <KokkosBatched::Experimental::Trans::NoTranspose,
656  KokkosBatched::Experimental::Trans::NoTranspose,
657  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
658  (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
659 #else
660  Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
661  STM::one (), kkval);
662 #endif
663  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(kkval.data ()), blockSize_, -STM::one(), STM::one());
664  }
665  else {
666 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
667  KokkosBatched::Experimental::SerialGemm
668  <KokkosBatched::Experimental::Trans::NoTranspose,
669  KokkosBatched::Experimental::Trans::NoTranspose,
670  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
671  (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), diagModBlock);
672 #else
673  Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
674  STM::one (), diagModBlock);
675 #endif
676  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.data ()), blockSize_, -STM::one(), STM::one());
677  }
678  }
679  }
680  }
681  if (NumL) {
682  // Replace current row of L
683  L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
684  }
685 
686  // dmat.assign(dmatV);
687  Tpetra::COPY (dmatV, dmat);
688 
689  if (this->RelaxValue_ != STM::zero ()) {
690  //dmat.update(this->RelaxValue_, diagModBlock);
691  Tpetra::AXPY (this->RelaxValue_, diagModBlock, dmat);
692  }
693 
694 // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
695 // if (STS::real (DV[i]) < STM::zero ()) {
696 // DV[i] = -MinDiagonalValue;
697 // }
698 // else {
699 // DV[i] = MinDiagonalValue;
700 // }
701 // }
702 // else
703  {
704  int lapackInfo = 0;
705  for (int k = 0; k < blockSize_; ++k) {
706  ipiv[k] = 0;
707  }
708 
709  Tpetra::GETF2 (dmat, ipiv, lapackInfo);
710  //lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
712  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
713  "lapackInfo = " << lapackInfo << " which indicates an error in the factorization GETRF.");
714 
715  Tpetra::GETRI (dmat, ipiv, work, lapackInfo);
716  //lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
718  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
719  "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
720  }
721 
722  for (local_ordinal_type j = 0; j < NumU; ++j) {
723  little_block_type currentVal((typename little_block_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride); // current_mults++;
724  // scale U by the diagonal inverse
725 #ifndef IFPACK2_RBILUK_INITIAL_NOKK
726  KokkosBatched::Experimental::SerialGemm
727  <KokkosBatched::Experimental::Trans::NoTranspose,
728  KokkosBatched::Experimental::Trans::NoTranspose,
729  KokkosBatched::Experimental::Algo::Gemm::Blocked>::invoke
730  (STS::one (), dmat, currentVal, STS::zero (), matTmp);
731 #else
732  Tpetra::GEMM ("N", "N", STS::one (), dmat, currentVal,
733  STS::zero (), matTmp);
734 #endif
735  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.data ()), reinterpret_cast<impl_scalar_type*>(currentVal.data ()), reinterpret_cast<impl_scalar_type*>(matTmp.data ()), blockSize_);
736  //currentVal.assign(matTmp);
737  Tpetra::COPY (matTmp, currentVal);
738  }
739 
740  if (NumU) {
741  // Replace current row of L and U
742  U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
743  }
744 
745 #ifndef IFPACK2_RBILUK_INITIAL
746  // Reset column flags
747  for (size_t j = 0; j < NumIn; ++j) {
748  colflag[InI[j]] = -1;
749  }
750 #else
751  // Reset column flags
752  for (size_t j = 0; j < num_cols; ++j) {
753  colflag[j] = -1;
754  }
755 #endif
756  }
757 
758  } // Stop timing
759 
760  // Sync everything back to device, for efficient solves.
761  {
762  typedef typename block_crs_matrix_type::device_type device_type;
763  if (! A_block_.is_null ()) {
765  Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
766  A_nc->template sync<device_type> ();
767  }
768  L_block_->template sync<device_type> ();
769  U_block_->template sync<device_type> ();
770  D_block_->template sync<device_type> ();
771  }
772 
773  this->isComputed_ = true;
774  this->numCompute_ += 1;
775  this->computeTime_ += timer.totalElapsedTime ();
776 }
777 
778 
779 template<class MatrixType>
780 void
782 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
783  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
784  Teuchos::ETransp mode,
785  scalar_type alpha,
786  scalar_type beta) const
787 {
788  using Teuchos::RCP;
789  typedef Tpetra::BlockMultiVector<scalar_type,
791  typedef Tpetra::MultiVector<scalar_type,
792  local_ordinal_type, global_ordinal_type, node_type> MV;
793 
795  A_block_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::apply: The matrix is "
796  "null. Please call setMatrix() with a nonnull input, then initialize() "
797  "and compute(), before calling this method.");
799  ! this->isComputed (), std::runtime_error,
800  "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
801  "you must call compute() before calling this method.");
802  TEUCHOS_TEST_FOR_EXCEPTION(
803  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
804  "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
805  "X.getNumVectors() = " << X.getNumVectors ()
806  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
807  TEUCHOS_TEST_FOR_EXCEPTION(
808  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
809  "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
810  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
811  "fixed. There is a FIXME in this file about this very issue.");
812 
813  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
814 
815  const local_ordinal_type rowStride = blockSize_;
816 
817  BMV yBlock (Y, * (A_block_->getGraph ()->getDomainMap ()), blockSize_);
818  const BMV xBlock (X, * (A_block_->getColMap ()), blockSize_);
819 
820  Teuchos::Array<scalar_type> lclarray(blockSize_);
821  little_vec_type lclvec((typename little_vec_type::value_type*)&lclarray[0], blockSize_);
822  const scalar_type one = STM::one ();
823  const scalar_type zero = STM::zero ();
824 
825  Teuchos::Time timer ("RBILUK::apply");
826  { // Start timing
827  Teuchos::TimeMonitor timeMon (timer);
828  if (alpha == one && beta == zero) {
829  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
830  // Start by solving L C = X for C. C must have the same Map
831  // as D. We have to use a temp multivector, since our
832  // implementation of triangular solves does not allow its
833  // input and output to alias one another.
834  //
835  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
836  const local_ordinal_type numVectors = xBlock.getNumVectors();
837  BMV cBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
838  BMV rBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
839  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
840  {
841  for (size_t i = 0; i < D_block_->getNodeNumRows(); ++i)
842  {
843  local_ordinal_type local_row = i;
844  little_vec_type xval = xBlock.getLocalBlock(local_row,imv);
845  little_vec_type cval = cBlock.getLocalBlock(local_row,imv);
846  //cval.assign(xval);
847  Tpetra::COPY (xval, cval);
848 
849  local_ordinal_type NumL;
850  const local_ordinal_type * colValsL;
851  scalar_type * valsL;
852 
853  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
854 
855  for (local_ordinal_type j = 0; j < NumL; ++j)
856  {
857  local_ordinal_type col = colValsL[j];
858  little_vec_type prevVal = cBlock.getLocalBlock(col, imv);
859 
860  const local_ordinal_type matOffset = blockMatSize*j;
861  little_block_type lij((typename little_block_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
862 
863  //cval.matvecUpdate(-one, lij, prevVal);
864  Tpetra::GEMV (-one, lij, prevVal, cval);
865  }
866  }
867  }
868 
869  // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
870  D_block_->applyBlock(cBlock, rBlock);
871 
872  // Solve U Y = R.
873  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
874  {
875  const local_ordinal_type numRows = D_block_->getNodeNumRows();
876  for (local_ordinal_type i = 0; i < numRows; ++i)
877  {
878  local_ordinal_type local_row = (numRows-1)-i;
879  little_vec_type rval = rBlock.getLocalBlock(local_row,imv);
880  little_vec_type yval = yBlock.getLocalBlock(local_row,imv);
881  //yval.assign(rval);
882  Tpetra::COPY (rval, yval);
883 
884  local_ordinal_type NumU;
885  const local_ordinal_type * colValsU;
886  scalar_type * valsU;
887 
888  U_block_->getLocalRowView(local_row, colValsU, valsU, NumU);
889 
890  for (local_ordinal_type j = 0; j < NumU; ++j)
891  {
892  local_ordinal_type col = colValsU[NumU-1-j];
893  little_vec_type prevVal = yBlock.getLocalBlock(col, imv);
894 
895  const local_ordinal_type matOffset = blockMatSize*(NumU-1-j);
896  little_block_type uij((typename little_block_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
897 
898  //yval.matvecUpdate(-one, uij, prevVal);
899  Tpetra::GEMV (-one, uij, prevVal, yval);
900  }
901  }
902  }
903  }
904  else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
905  TEUCHOS_TEST_FOR_EXCEPTION(
906  true, std::runtime_error,
907  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm. ");
908  }
909  }
910  else { // alpha != 1 or beta != 0
911  if (alpha == zero) {
912  if (beta == zero) {
913  Y.putScalar (zero);
914  } else {
915  Y.scale (beta);
916  }
917  } else { // alpha != zero
918  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
919  apply (X, Y_tmp, mode);
920  Y.update (alpha, Y_tmp, beta);
921  }
922  }
923  } // Stop timing
924 
925  this->numApply_ += 1;
926  this->applyTime_ = timer.totalElapsedTime ();
927 }
928 
929 
930 template<class MatrixType>
932 {
933  std::ostringstream os;
934 
935  // Output is a valid YAML dictionary in flow style. If you don't
936  // like everything on a single line, you should call describe()
937  // instead.
938  os << "\"Ifpack2::Experimental::RBILUK\": {";
939  os << "Initialized: " << (this->isInitialized () ? "true" : "false") << ", "
940  << "Computed: " << (this->isComputed () ? "true" : "false") << ", ";
941 
942  os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
943 
944  if (A_block_.is_null ()) {
945  os << "Matrix: null";
946  }
947  else {
948  os << "Global matrix dimensions: ["
949  << A_block_->getGlobalNumRows () << ", " << A_block_->getGlobalNumCols () << "]"
950  << ", Global nnz: " << A_block_->getGlobalNumEntries();
951  }
952 
953  os << "}";
954  return os.str ();
955 }
956 
957 } // namespace Experimental
958 
959 } // namespace Ifpack2
960 
961 // FIXME (mfh 26 Aug 2015) We only need to do instantiation for
962 // MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
963 // handled internally via dynamic cast.
964 
965 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
966  template class Ifpack2::Experimental::RBILUK< Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
967  template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
968 
969 #endif
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:187
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:145
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:81
Teuchos::RCP< const block_crs_matrix_type > getBlockMatrix() const
Get the input matrix.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:182
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:139
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:136
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:782
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:151
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:243
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:148
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:142
T * getRawPtr() const
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:125
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:448
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:97
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:86
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:111
Sparse matrix (Tpetra::RowMatrix subclass) with ghost rows.
Definition: Ifpack2_OverlappingRowMatrix_decl.hpp:58
double totalElapsedTime(bool readCurrentTime=false) const
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:157
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:931
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:128