Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_ILUT_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_ILUT_DEF_HPP
44 #define IFPACK2_ILUT_DEF_HPP
45 
46 // disable clang warnings
47 #if defined (__clang__) && !defined (__INTEL_COMPILER)
48 #pragma clang system_header
49 #endif
50 
51 #include "Ifpack2_Heap.hpp"
52 #include "Ifpack2_LocalFilter.hpp"
53 #include "Ifpack2_LocalSparseTriangularSolver.hpp"
54 #include "Ifpack2_Parameters.hpp"
55 #include "Ifpack2_Details_getParamTryingTypes.hpp"
56 #include "Tpetra_CrsMatrix.hpp"
57 #include "Teuchos_Time.hpp"
59 #include <type_traits>
60 
61 namespace Ifpack2 {
62 
63  namespace {
64 
89  template<class ScalarType>
91  ilutDefaultDropTolerance () {
93  typedef typename STS::magnitudeType magnitude_type;
95 
96  // 1/2. Hopefully this can be represented in magnitude_type.
97  const magnitude_type oneHalf = STM::one() / (STM::one() + STM::one());
98 
99  // The min ensures that in case magnitude_type has very low
100  // precision, we'll at least get some value strictly less than
101  // one.
102  return std::min (static_cast<magnitude_type> (1000) * STS::magnitude (STS::eps ()), oneHalf);
103  }
104 
105  // Full specialization for ScalarType = double.
106  // This specialization preserves ILUT's previous default behavior.
107  template<>
109  ilutDefaultDropTolerance<double> () {
110  return 1e-12;
111  }
112 
113  } // namespace (anonymous)
114 
115 
116 template <class MatrixType>
118  A_ (A),
119  Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
120  Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
121  RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
122  LevelOfFill_ (1.0),
123  DropTolerance_ (ilutDefaultDropTolerance<scalar_type> ()),
124  InitializeTime_ (0.0),
125  ComputeTime_ (0.0),
126  ApplyTime_ (0.0),
127  NumInitialize_ (0),
128  NumCompute_ (0),
129  NumApply_ (0),
130  IsInitialized_ (false),
131  IsComputed_ (false)
132 {
133  allocateSolvers();
134 }
135 
136 template<class MatrixType>
138 {
141 }
142 
143 template <class MatrixType>
145 {
146  using Details::getParamTryingTypes;
147  const char prefix[] = "Ifpack2::ILUT: ";
148 
149  // Don't actually change the instance variables until we've checked
150  // all parameters. This ensures that setParameters satisfies the
151  // strong exception guarantee (i.e., is transactional).
152 
153  // Fill level in ILUT is a double, not a magnitude_type, because it
154  // depends on LO and GO, not on Scalar. Also, you can't cast
155  // arbitrary magnitude_type (e.g., Sacado::MP::Vector) to double.
156  double fillLevel = LevelOfFill_;
157  {
158  const std::string paramName ("fact: ilut level-of-fill");
159  getParamTryingTypes<double, double, float>
160  (fillLevel, params, paramName, prefix);
162  (fillLevel < 1.0, std::runtime_error,
163  "Ifpack2::ILUT: The \"fact: ilut level-of-fill\" parameter must be >= "
164  "1.0, but you set it to " << fillLevel << ". For ILUT, the fill level "
165  "means something different than it does for ILU(k). ILU(0) produces "
166  "factors with the same sparsity structure as the input matrix A. For "
167  "ILUT, level-of-fill = 1.0 will produce factors with nonzeros matching "
168  "the sparsity structure of A. level-of-fill > 1.0 allows for additional "
169  "fill-in.");
170  }
171 
172  magnitude_type absThresh = Athresh_;
173  {
174  const std::string paramName ("fact: absolute threshold");
175  getParamTryingTypes<magnitude_type, magnitude_type, double>
176  (absThresh, params, paramName, prefix);
177  }
178 
179  magnitude_type relThresh = Rthresh_;
180  {
181  const std::string paramName ("fact: relative threshold");
182  getParamTryingTypes<magnitude_type, magnitude_type, double>
183  (relThresh, params, paramName, prefix);
184  }
185 
186  magnitude_type relaxValue = RelaxValue_;
187  {
188  const std::string paramName ("fact: relax value");
189  getParamTryingTypes<magnitude_type, magnitude_type, double>
190  (relaxValue, params, paramName, prefix);
191  }
192 
193  magnitude_type dropTol = DropTolerance_;
194  {
195  const std::string paramName ("fact: drop tolerance");
196  getParamTryingTypes<magnitude_type, magnitude_type, double>
197  (dropTol, params, paramName, prefix);
198  }
199 
200  // Forward to trisolvers.
201  L_solver_->setParameters(params);
202  U_solver_->setParameters(params);
203 
204  LevelOfFill_ = fillLevel;
205  Athresh_ = absThresh;
206  Rthresh_ = relThresh;
207  RelaxValue_ = relaxValue;
208  DropTolerance_ = dropTol;
209 }
210 
211 
212 template <class MatrixType>
216  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getComm: "
217  "The matrix is null. Please call setMatrix() with a nonnull input "
218  "before calling this method.");
219  return A_->getComm ();
220 }
221 
222 
223 template <class MatrixType>
226  return A_;
227 }
228 
229 
230 template <class MatrixType>
233 {
235  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getDomainMap: "
236  "The matrix is null. Please call setMatrix() with a nonnull input "
237  "before calling this method.");
238  return A_->getDomainMap ();
239 }
240 
241 
242 template <class MatrixType>
245 {
247  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getRangeMap: "
248  "The matrix is null. Please call setMatrix() with a nonnull input "
249  "before calling this method.");
250  return A_->getRangeMap ();
251 }
252 
253 
254 template <class MatrixType>
256  return true;
257 }
258 
259 
260 template <class MatrixType>
262  return NumInitialize_;
263 }
264 
265 
266 template <class MatrixType>
268  return NumCompute_;
269 }
270 
271 
272 template <class MatrixType>
274  return NumApply_;
275 }
276 
277 
278 template <class MatrixType>
280  return InitializeTime_;
281 }
282 
283 
284 template<class MatrixType>
286  return ComputeTime_;
287 }
288 
289 
290 template<class MatrixType>
292  return ApplyTime_;
293 }
294 
295 
296 template<class MatrixType>
299  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getNodeSmootherComplexity: "
300  "The input matrix A is null. Please call setMatrix() with a nonnull "
301  "input matrix, then call compute(), before calling this method.");
302  // ILUT methods cost roughly one apply + the nnz in the upper+lower triangles
303  return A_->getNodeNumEntries() + getNodeNumEntries();
304 }
305 
306 
307 template<class MatrixType>
309  return L_->getGlobalNumEntries () + U_->getGlobalNumEntries ();
310 }
311 
312 
313 template<class MatrixType>
315  return L_->getNodeNumEntries () + U_->getNodeNumEntries ();
316 }
317 
318 
319 template<class MatrixType>
321 {
322  if (A.getRawPtr () != A_.getRawPtr ()) {
323  // Check in serial or one-process mode if the matrix is square.
325  ! A.is_null () && A->getComm ()->getSize () == 1 &&
326  A->getNodeNumRows () != A->getNodeNumCols (),
327  std::runtime_error, "Ifpack2::ILUT::setMatrix: If A's communicator only "
328  "contains one process, then A must be square. Instead, you provided a "
329  "matrix A with " << A->getNodeNumRows () << " rows and "
330  << A->getNodeNumCols () << " columns.");
331 
332  // It's legal for A to be null; in that case, you may not call
333  // initialize() until calling setMatrix() with a nonnull input.
334  // Regardless, setting the matrix invalidates any previous
335  // factorization.
336  IsInitialized_ = false;
337  IsComputed_ = false;
338  A_local_ = Teuchos::null;
339 
340  // The sparse triangular solvers get a triangular factor as their
341  // input matrix. The triangular factors L_ and U_ are getting
342  // reset, so we reset the solvers' matrices to null. Do that
343  // before setting L_ and U_ to null, so that latter step actually
344  // frees the factors.
345  if (! L_solver_.is_null ()) {
346  L_solver_->setMatrix (Teuchos::null);
347  }
348  if (! U_solver_.is_null ()) {
349  U_solver_->setMatrix (Teuchos::null);
350  }
351 
352  L_ = Teuchos::null;
353  U_ = Teuchos::null;
354  A_ = A;
355  }
356 }
357 
358 
359 template<class MatrixType>
361 {
362  Teuchos::Time timer ("ILUT::initialize");
363  {
364  Teuchos::TimeMonitor timeMon (timer);
365 
366  // Check that the matrix is nonnull.
368  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::initialize: "
369  "The matrix to precondition is null. Please call setMatrix() with a "
370  "nonnull input before calling this method.");
371 
372  // Clear any previous computations.
373  IsInitialized_ = false;
374  IsComputed_ = false;
375  A_local_ = Teuchos::null;
376  L_ = Teuchos::null;
377  U_ = Teuchos::null;
378 
379  A_local_ = makeLocalFilter (A_); // Compute the local filter.
380 
381  IsInitialized_ = true;
382  ++NumInitialize_;
383  }
384  InitializeTime_ += timer.totalElapsedTime ();
385 }
386 
387 
388 template<typename ScalarType>
390 scalar_mag (const ScalarType& s)
391 {
393 }
394 
395 
396 template<class MatrixType>
398 {
399  using Teuchos::Array;
400  using Teuchos::ArrayRCP;
401  using Teuchos::ArrayView;
402  using Teuchos::as;
403  using Teuchos::rcp;
404  using Teuchos::reduceAll;
405 
406  //--------------------------------------------------------------------------
407  // Ifpack2::ILUT is a translation of the Aztec ILUT implementation. The Aztec
408  // ILUT implementation was written by Ray Tuminaro.
409  //
410  // This isn't an exact translation of the Aztec ILUT algorithm, for the
411  // following reasons:
412  // 1. Minor differences result from the fact that Aztec factors a MSR format
413  // matrix in place, while the code below factors an input CrsMatrix which
414  // remains untouched and stores the resulting factors in separate L and U
415  // CrsMatrix objects.
416  // Also, the Aztec code begins by shifting the matrix pointers back
417  // by one, and the pointer contents back by one, and then using 1-based
418  // Fortran-style indexing in the algorithm. This Ifpack2 code uses C-style
419  // 0-based indexing throughout.
420  // 2. Aztec stores the inverse of the diagonal of U. This Ifpack2 code
421  // stores the non-inverted diagonal in U.
422  // The triangular solves (in Ifpack2::ILUT::apply()) are performed by
423  // calling the Tpetra::CrsMatrix::solve method on the L and U objects, and
424  // this requires U to contain the non-inverted diagonal.
425  //
426  // ABW.
427  //--------------------------------------------------------------------------
428 
429  // Don't count initialization in the compute() time.
430  if (! isInitialized ()) {
431  initialize ();
432  }
433 
434  Teuchos::Time timer ("ILUT::compute");
435  { // Timer scope for timing compute()
436  Teuchos::TimeMonitor timeMon (timer, true);
437  const scalar_type zero = STS::zero ();
438  const scalar_type one = STS::one ();
439 
440  const local_ordinal_type myNumRows = A_local_->getNodeNumRows ();
441 
442  // If this macro is defined, files containing the L and U factors
443  // will be written. DON'T CHECK IN THE CODE WITH THIS MACRO ENABLED!!!
444  // #define IFPACK2_WRITE_FACTORS
445 #ifdef IFPACK2_WRITE_FACTORS
446  std::ofstream ofsL("L.tif.mtx", std::ios::out);
447  std::ofstream ofsU("U.tif.mtx", std::ios::out);
448 #endif
449 
450  // Calculate how much fill will be allowed in addition to the
451  // space that corresponds to the input matrix entries.
452  double local_nnz = static_cast<double> (A_local_->getNodeNumEntries ());
453  double fill = ((getLevelOfFill () - 1.0) * local_nnz) / (2 * myNumRows);
454 
455  // std::ceil gives the smallest integer larger than the argument.
456  // this may give a slightly different result than Aztec's fill value in
457  // some cases.
458  double fill_ceil=std::ceil(fill);
459 
460  // Similarly to Aztec, we will allow the same amount of fill for each
461  // row, half in L and half in U.
462  size_type fillL = static_cast<size_type>(fill_ceil);
463  size_type fillU = static_cast<size_type>(fill_ceil);
464 
465  Array<scalar_type> InvDiagU (myNumRows, zero);
466 
467  Array<Array<local_ordinal_type> > L_tmp_idx(myNumRows);
468  Array<Array<scalar_type> > L_tmpv(myNumRows);
469  Array<Array<local_ordinal_type> > U_tmp_idx(myNumRows);
470  Array<Array<scalar_type> > U_tmpv(myNumRows);
471 
472  enum { UNUSED, ORIG, FILL };
473  local_ordinal_type max_col = myNumRows;
474 
475  Array<int> pattern(max_col, UNUSED);
476  Array<scalar_type> cur_row(max_col, zero);
477  Array<magnitude_type> unorm(max_col);
478  magnitude_type rownorm;
479  Array<local_ordinal_type> L_cols_heap;
480  Array<local_ordinal_type> U_cols;
481  Array<local_ordinal_type> L_vals_heap;
482  Array<local_ordinal_type> U_vals_heap;
483 
484  // A comparison object which will be used to create 'heaps' of indices
485  // that are ordered according to the corresponding values in the
486  // 'cur_row' array.
487  greater_indirect<scalar_type,local_ordinal_type> vals_comp(cur_row);
488 
489  // =================== //
490  // start factorization //
491  // =================== //
492 
493  ArrayRCP<local_ordinal_type> ColIndicesARCP;
494  ArrayRCP<scalar_type> ColValuesARCP;
495  if (! A_local_->supportsRowViews ()) {
496  const size_t maxnz = A_local_->getNodeMaxNumRowEntries ();
497  ColIndicesARCP.resize (maxnz);
498  ColValuesARCP.resize (maxnz);
499  }
500 
501  for (local_ordinal_type row_i = 0 ; row_i < myNumRows ; ++row_i) {
502  ArrayView<const local_ordinal_type> ColIndicesA;
503  ArrayView<const scalar_type> ColValuesA;
504  size_t RowNnz;
505 
506  if (A_local_->supportsRowViews ()) {
507  A_local_->getLocalRowView (row_i, ColIndicesA, ColValuesA);
508  RowNnz = ColIndicesA.size ();
509  }
510  else {
511  A_local_->getLocalRowCopy (row_i, ColIndicesARCP (), ColValuesARCP (), RowNnz);
512  ColIndicesA = ColIndicesARCP (0, RowNnz);
513  ColValuesA = ColValuesARCP (0, RowNnz);
514  }
515 
516  // Always include the diagonal in the U factor. The value should get
517  // set in the next loop below.
518  U_cols.push_back(row_i);
519  cur_row[row_i] = zero;
520  pattern[row_i] = ORIG;
521 
522  size_type L_cols_heaplen = 0;
523  rownorm = STM::zero ();
524  for (size_t i = 0; i < RowNnz; ++i) {
525  if (ColIndicesA[i] < myNumRows) {
526  if (ColIndicesA[i] < row_i) {
527  add_to_heap(ColIndicesA[i], L_cols_heap, L_cols_heaplen);
528  }
529  else if (ColIndicesA[i] > row_i) {
530  U_cols.push_back(ColIndicesA[i]);
531  }
532 
533  cur_row[ColIndicesA[i]] = ColValuesA[i];
534  pattern[ColIndicesA[i]] = ORIG;
535  rownorm += scalar_mag(ColValuesA[i]);
536  }
537  }
538 
539  // Alter the diagonal according to the absolute-threshold and
540  // relative-threshold values. If not set, those values default
541  // to zero and one respectively.
542  const magnitude_type rthresh = getRelativeThreshold();
543  const scalar_type& v = cur_row[row_i];
544  cur_row[row_i] = as<scalar_type> (getAbsoluteThreshold() * IFPACK2_SGN(v)) + rthresh*v;
545 
546  size_type orig_U_len = U_cols.size();
547  RowNnz = L_cols_heap.size() + orig_U_len;
548  rownorm = getDropTolerance() * rownorm/RowNnz;
549 
550  // The following while loop corresponds to the 'L30' goto's in Aztec.
551  size_type L_vals_heaplen = 0;
552  while (L_cols_heaplen > 0) {
553  local_ordinal_type row_k = L_cols_heap.front();
554 
555  scalar_type multiplier = cur_row[row_k] * InvDiagU[row_k];
556  cur_row[row_k] = multiplier;
557  magnitude_type mag_mult = scalar_mag(multiplier);
558  if (mag_mult*unorm[row_k] < rownorm) {
559  pattern[row_k] = UNUSED;
560  rm_heap_root(L_cols_heap, L_cols_heaplen);
561  continue;
562  }
563  if (pattern[row_k] != ORIG) {
564  if (L_vals_heaplen < fillL) {
565  add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
566  }
567  else if (L_vals_heaplen==0 ||
568  mag_mult < scalar_mag(cur_row[L_vals_heap.front()])) {
569  pattern[row_k] = UNUSED;
570  rm_heap_root(L_cols_heap, L_cols_heaplen);
571  continue;
572  }
573  else {
574  pattern[L_vals_heap.front()] = UNUSED;
575  rm_heap_root(L_vals_heap, L_vals_heaplen, vals_comp);
576  add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
577  }
578  }
579 
580  /* Reduce current row */
581 
582  ArrayView<local_ordinal_type> ColIndicesU = U_tmp_idx[row_k]();
583  ArrayView<scalar_type> ColValuesU = U_tmpv[row_k]();
584  size_type ColNnzU = ColIndicesU.size();
585 
586  for(size_type j=0; j<ColNnzU; ++j) {
587  if (ColIndicesU[j] > row_k) {
588  scalar_type tmp = multiplier * ColValuesU[j];
589  local_ordinal_type col_j = ColIndicesU[j];
590  if (pattern[col_j] != UNUSED) {
591  cur_row[col_j] -= tmp;
592  }
593  else if (scalar_mag(tmp) > rownorm) {
594  cur_row[col_j] = -tmp;
595  pattern[col_j] = FILL;
596  if (col_j > row_i) {
597  U_cols.push_back(col_j);
598  }
599  else {
600  add_to_heap(col_j, L_cols_heap, L_cols_heaplen);
601  }
602  }
603  }
604  }
605 
606  rm_heap_root(L_cols_heap, L_cols_heaplen);
607  }//end of while(L_cols_heaplen) loop
608 
609 
610  // Put indices and values for L into arrays and then into the L_ matrix.
611 
612  // first, the original entries from the L section of A:
613  for (size_type i = 0; i < ColIndicesA.size (); ++i) {
614  if (ColIndicesA[i] < row_i) {
615  L_tmp_idx[row_i].push_back(ColIndicesA[i]);
616  L_tmpv[row_i].push_back(cur_row[ColIndicesA[i]]);
617  pattern[ColIndicesA[i]] = UNUSED;
618  }
619  }
620 
621  // next, the L entries resulting from fill:
622  for (size_type j = 0; j < L_vals_heaplen; ++j) {
623  L_tmp_idx[row_i].push_back(L_vals_heap[j]);
624  L_tmpv[row_i].push_back(cur_row[L_vals_heap[j]]);
625  pattern[L_vals_heap[j]] = UNUSED;
626  }
627 
628  // L has a one on the diagonal, but we don't explicitly store
629  // it. If we don't store it, then the kernel which performs the
630  // triangular solve can assume a unit diagonal, take a short-cut
631  // and perform faster.
632 
633 #ifdef IFPACK2_WRITE_FACTORS
634  for (size_type ii = 0; ii < L_tmp_idx[row_i].size (); ++ii) {
635  ofsL << row_i << " " << L_tmp_idx[row_i][ii] << " "
636  << L_tmpv[row_i][ii] << std::endl;
637  }
638 #endif
639 
640 
641  // Pick out the diagonal element, store its reciprocal.
642  if (cur_row[row_i] == zero) {
643  std::cerr << "Ifpack2::ILUT::Compute: zero pivot encountered! "
644  << "Replacing with rownorm and continuing..."
645  << "(You may need to set the parameter "
646  << "'fact: absolute threshold'.)" << std::endl;
647  cur_row[row_i] = rownorm;
648  }
649  InvDiagU[row_i] = one / cur_row[row_i];
650 
651  // Non-inverted diagonal is stored for U:
652  U_tmp_idx[row_i].push_back(row_i);
653  U_tmpv[row_i].push_back(cur_row[row_i]);
654  unorm[row_i] = scalar_mag(cur_row[row_i]);
655  pattern[row_i] = UNUSED;
656 
657  // Now put indices and values for U into arrays and then into the U_ matrix.
658  // The first entry in U_cols is the diagonal, which we just handled, so we'll
659  // start our loop at j=1.
660 
661  size_type U_vals_heaplen = 0;
662  for(size_type j=1; j<U_cols.size(); ++j) {
663  local_ordinal_type col = U_cols[j];
664  if (pattern[col] != ORIG) {
665  if (U_vals_heaplen < fillU) {
666  add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
667  }
668  else if (U_vals_heaplen!=0 && scalar_mag(cur_row[col]) >
669  scalar_mag(cur_row[U_vals_heap.front()])) {
670  rm_heap_root(U_vals_heap, U_vals_heaplen, vals_comp);
671  add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
672  }
673  }
674  else {
675  U_tmp_idx[row_i].push_back(col);
676  U_tmpv[row_i].push_back(cur_row[col]);
677  unorm[row_i] += scalar_mag(cur_row[col]);
678  }
679  pattern[col] = UNUSED;
680  }
681 
682  for(size_type j=0; j<U_vals_heaplen; ++j) {
683  U_tmp_idx[row_i].push_back(U_vals_heap[j]);
684  U_tmpv[row_i].push_back(cur_row[U_vals_heap[j]]);
685  unorm[row_i] += scalar_mag(cur_row[U_vals_heap[j]]);
686  }
687 
688  unorm[row_i] /= (orig_U_len + U_vals_heaplen);
689 
690 #ifdef IFPACK2_WRITE_FACTORS
691  for(int ii=0; ii<U_tmp_idx[row_i].size(); ++ii) {
692  ofsU <<row_i<< " " <<U_tmp_idx[row_i][ii]<< " "
693  <<U_tmpv[row_i][ii]<< std::endl;
694  }
695 #endif
696 
697  L_cols_heap.clear();
698  U_cols.clear();
699  L_vals_heap.clear();
700  U_vals_heap.clear();
701  } // end of for(row_i) loop
702 
703  // Now allocate and fill the matrices
704  Array<size_t> nnzPerRow(myNumRows);
705 
706  // Make sure to release the old memory for L & U prior to recomputing to
707  // avoid bloating the high-water mark.
708  L_ = Teuchos::null;
709  U_ = Teuchos::null;
710  L_solver_->setMatrix(Teuchos::null);
711  U_solver_->setMatrix(Teuchos::null);
712 
713  for (local_ordinal_type row_i = 0 ; row_i < myNumRows ; ++row_i) {
714  nnzPerRow[row_i] = L_tmp_idx[row_i].size();
715  }
716 
717  L_ = rcp (new crs_matrix_type (A_local_->getRowMap(), A_local_->getColMap(),
718  nnzPerRow(), Tpetra::StaticProfile));
719 
720  for (local_ordinal_type row_i = 0 ; row_i < myNumRows ; ++row_i) {
721  L_->insertLocalValues (row_i, L_tmp_idx[row_i](), L_tmpv[row_i]());
722  }
723 
724  L_->fillComplete();
725 
726  for (local_ordinal_type row_i = 0 ; row_i < myNumRows ; ++row_i) {
727  nnzPerRow[row_i] = U_tmp_idx[row_i].size();
728  }
729 
730  U_ = rcp (new crs_matrix_type (A_local_->getRowMap(), A_local_->getColMap(),
731  nnzPerRow(), Tpetra::StaticProfile));
732 
733  for (local_ordinal_type row_i = 0 ; row_i < myNumRows ; ++row_i) {
734  U_->insertLocalValues (row_i, U_tmp_idx[row_i](), U_tmpv[row_i]());
735  }
736 
737  U_->fillComplete();
738 
739  L_solver_->setMatrix(L_);
740  L_solver_->initialize ();
741  L_solver_->compute ();
742 
743  U_solver_->setMatrix(U_);
744  U_solver_->initialize ();
745  U_solver_->compute ();
746  }
747  ComputeTime_ += timer.totalElapsedTime ();
748  IsComputed_ = true;
749  ++NumCompute_;
750 }
751 
752 
753 template <class MatrixType>
755 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
756  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
757  Teuchos::ETransp mode,
758  scalar_type alpha,
759  scalar_type beta) const
760 {
761  using Teuchos::RCP;
762  using Teuchos::rcp;
763  using Teuchos::rcpFromRef;
765 
767  ! isComputed (), std::runtime_error,
768  "Ifpack2::ILUT::apply: You must call compute() to compute the incomplete "
769  "factorization, before calling apply().");
770 
772  X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
773  "Ifpack2::ILUT::apply: X and Y must have the same number of columns. "
774  "X has " << X.getNumVectors () << " columns, but Y has "
775  << Y.getNumVectors () << " columns.");
776 
777  const scalar_type one = STS::one ();
778  const scalar_type zero = STS::zero ();
779 
780  Teuchos::Time timer ("ILUT::apply");
781  { // Start timing
782  Teuchos::TimeMonitor timeMon (timer, true);
783 
784  if (alpha == one && beta == zero) {
785  if (mode == Teuchos::NO_TRANS) { // Solve L (U Y) = X for Y.
786  // Start by solving L Y = X for Y.
787  L_solver_->apply (X, Y, mode);
788 
789  // Solve U Y = Y.
790  U_solver_->apply (Y, Y, mode);
791  }
792  else { // Solve U^P (L^P Y)) = X for Y (where P is * or T).
793 
794  // Start by solving U^P Y = X for Y.
795  U_solver_->apply (X, Y, mode);
796 
797  // Solve L^P Y = Y.
798  L_solver_->apply (Y, Y, mode);
799  }
800  }
801  else { // alpha != 1 or beta != 0
802  if (alpha == zero) {
803  // The special case for beta == 0 ensures that if Y contains Inf
804  // or NaN values, we replace them with 0 (following BLAS
805  // convention), rather than multiplying them by 0 to get NaN.
806  if (beta == zero) {
807  Y.putScalar (zero);
808  } else {
809  Y.scale (beta);
810  }
811  } else { // alpha != zero
812  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
813  apply (X, Y_tmp, mode);
814  Y.update (alpha, Y_tmp, beta);
815  }
816  }
817  }//end timing
818 
819  ++NumApply_;
820  ApplyTime_ += timer.totalElapsedTime ();
821 }
822 
823 
824 template <class MatrixType>
825 std::string ILUT<MatrixType>::description () const
826 {
827  std::ostringstream os;
828 
829  // Output is a valid YAML dictionary in flow style. If you don't
830  // like everything on a single line, you should call describe()
831  // instead.
832  os << "\"Ifpack2::ILUT\": {";
833  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
834  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
835 
836  os << "Level-of-fill: " << getLevelOfFill() << ", "
837  << "absolute threshold: " << getAbsoluteThreshold() << ", "
838  << "relative threshold: " << getRelativeThreshold() << ", "
839  << "relaxation value: " << getRelaxValue() << ", ";
840 
841  if (A_.is_null ()) {
842  os << "Matrix: null";
843  }
844  else {
845  os << "Global matrix dimensions: ["
846  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
847  << ", Global nnz: " << A_->getGlobalNumEntries();
848  }
849 
850  os << "}";
851  return os.str ();
852 }
853 
854 
855 template <class MatrixType>
856 void
859  const Teuchos::EVerbosityLevel verbLevel) const
860 {
861  using Teuchos::Comm;
862  using Teuchos::OSTab;
863  using Teuchos::RCP;
865  using std::endl;
866  using Teuchos::VERB_DEFAULT;
867  using Teuchos::VERB_NONE;
868  using Teuchos::VERB_LOW;
869  using Teuchos::VERB_MEDIUM;
870  using Teuchos::VERB_HIGH;
871  using Teuchos::VERB_EXTREME;
872 
873  const Teuchos::EVerbosityLevel vl =
874  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
875  OSTab tab0 (out);
876 
877  if (vl > VERB_NONE) {
878  out << "\"Ifpack2::ILUT\":" << endl;
879  OSTab tab1 (out);
880  out << "MatrixType: " << TypeNameTraits<MatrixType>::name () << endl;
881  if (this->getObjectLabel () != "") {
882  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
883  }
884  out << "Initialized: " << (isInitialized () ? "true" : "false")
885  << endl
886  << "Computed: " << (isComputed () ? "true" : "false")
887  << endl
888  << "Level of fill: " << getLevelOfFill () << endl
889  << "Absolute threshold: " << getAbsoluteThreshold () << endl
890  << "Relative threshold: " << getRelativeThreshold () << endl
891  << "Relax value: " << getRelaxValue () << endl;
892 
893  if (isComputed () && vl >= VERB_HIGH) {
894  const double fillFraction =
895  (double) getGlobalNumEntries () / (double) A_->getGlobalNumEntries ();
896  const double nnzToRows =
897  (double) getGlobalNumEntries () / (double) U_->getGlobalNumRows ();
898 
899  out << "Dimensions of L: [" << L_->getGlobalNumRows () << ", "
900  << L_->getGlobalNumRows () << "]" << endl
901  << "Dimensions of U: [" << U_->getGlobalNumRows () << ", "
902  << U_->getGlobalNumRows () << "]" << endl
903  << "Number of nonzeros in factors: " << getGlobalNumEntries () << endl
904  << "Fill fraction of factors over A: " << fillFraction << endl
905  << "Ratio of nonzeros to rows: " << nnzToRows << endl;
906  }
907 
908  out << "Number of initialize calls: " << getNumInitialize () << endl
909  << "Number of compute calls: " << getNumCompute () << endl
910  << "Number of apply calls: " << getNumApply () << endl
911  << "Total time in seconds for initialize: " << getInitializeTime () << endl
912  << "Total time in seconds for compute: " << getComputeTime () << endl
913  << "Total time in seconds for apply: " << getApplyTime () << endl;
914 
915  out << "Local matrix:" << endl;
916  A_local_->describe (out, vl);
917  }
918 }
919 
920 template <class MatrixType>
923 {
924  if (A->getComm ()->getSize () > 1) {
926  } else {
927  return A;
928  }
929 }
930 
931 }//namespace Ifpack2
932 
933 
934 // FIXME (mfh 16 Sep 2014) We should really only use RowMatrix here!
935 // There's no need to instantiate for CrsMatrix too. All Ifpack2
936 // preconditioners can and should do dynamic casts if they need a type
937 // more specific than RowMatrix.
938 
939 #define IFPACK2_ILUT_INSTANT(S,LO,GO,N) \
940  template class Ifpack2::ILUT< Tpetra::RowMatrix<S, LO, GO, N> >;
941 
942 #endif /* IFPACK2_ILUT_DEF_HPP */
ILUT(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_ILUT_def.hpp:117
bool hasTransposeApply() const
Whether this object&#39;s apply() method can apply the transpose (or conjugate transpose, if applicable).
Definition: Ifpack2_ILUT_def.hpp:255
basic_OSTab< char > OSTab
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
global_size_t getGlobalNumEntries() const
Returns the number of nonzero entries in the global graph.
Definition: Ifpack2_ILUT_def.hpp:308
void initialize()
Clear any previously computed factors.
Definition: Ifpack2_ILUT_def.hpp:360
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
Definition: Ifpack2_ILUT_def.hpp:858
std::string description() const
Return a simple one-line description of this object.
Definition: Ifpack2_ILUT_def.hpp:825
ILUT (incomplete LU factorization with threshold) of a Tpetra sparse matrix.
Definition: Ifpack2_ILUT_decl.hpp:91
size_t getNodeNumEntries() const
Returns the number of nonzero entries in the local graph.
Definition: Ifpack2_ILUT_def.hpp:314
void rm_heap_root(Teuchos::Array< Ordinal > &heap, SizeType &heap_len)
Definition: Ifpack2_Heap.hpp:92
T * getRawPtr() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Teuchos::RCP< const map_type > getRangeMap() const
Tpetra::Map representing the range of this operator.
Definition: Ifpack2_ILUT_def.hpp:244
&quot;Preconditioner&quot; that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:77
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_ILUT_decl.hpp:106
void compute()
Compute factors L and U using the specified diagonal perturbation thresholds and relaxation parameter...
Definition: Ifpack2_ILUT_def.hpp:397
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_ILUT_def.hpp:320
static magnitudeType magnitude(T a)
int getNumInitialize() const
Returns the number of calls to Initialize().
Definition: Ifpack2_ILUT_def.hpp:261
double getApplyTime() const
Returns the time spent in apply().
Definition: Ifpack2_ILUT_def.hpp:291
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the input matrix&#39;s communicator.
Definition: Ifpack2_ILUT_def.hpp:214
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Type of the Tpetra::CrsMatrix specialization that this class uses for the L and U factors...
Definition: Ifpack2_ILUT_decl.hpp:126
TypeTo as(const TypeFrom &t)
Teuchos::RCP< const map_type > getDomainMap() const
Tpetra::Map representing the domain of this operator.
Definition: Ifpack2_ILUT_def.hpp:232
double totalElapsedTime(bool readCurrentTime=false) const
void add_to_heap(const Ordinal &idx, Teuchos::Array< Ordinal > &heap, SizeType &heap_len)
Definition: Ifpack2_Heap.hpp:70
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_ILUT_def.hpp:297
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160
double getInitializeTime() const
Returns the time spent in Initialize().
Definition: Ifpack2_ILUT_def.hpp:279
int getNumApply() const
Returns the number of calls to apply().
Definition: Ifpack2_ILUT_def.hpp:273
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the ILUT preconditioner to X, resulting in Y.
Definition: Ifpack2_ILUT_def.hpp:755
double getComputeTime() const
Returns the time spent in Compute().
Definition: Ifpack2_ILUT_def.hpp:285
void setParameters(const Teuchos::ParameterList &params)
Set preconditioner parameters.
Definition: Ifpack2_ILUT_def.hpp:144
int getNumCompute() const
Returns the number of calls to Compute().
Definition: Ifpack2_ILUT_def.hpp:267
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_ILUT_decl.hpp:109
bool is_null() const
Teuchos::RCP< const row_matrix_type > getMatrix() const
Returns a reference to the matrix to be preconditioned.
Definition: Ifpack2_ILUT_def.hpp:225