Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superlu_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
19 #ifndef AMESOS2_SUPERLU_DEF_HPP
20 #define AMESOS2_SUPERLU_DEF_HPP
21 
22 #include <Teuchos_Tuple.hpp>
23 #include <Teuchos_ParameterList.hpp>
24 #include <Teuchos_StandardParameterEntryValidators.hpp>
25 
27 #include "Amesos2_Superlu_decl.hpp"
28 
29 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
30 // TODO: This 'using namespace SLU' is not a good thing.
31 // It was added because kernels does not namespace SuperLU but Amesos2 does.
32 // Declaring the namespace SLU allows us to reuse that file without duplication.
33 // We need to determine a uniform system to avoid this this but for now, at least
34 // this issue is only present when TRSV is enabled.
35 using namespace SLU;
36 #include "KokkosSparse_sptrsv_superlu.hpp"
37 #endif
38 
39 namespace Amesos2 {
40 
41 
42 template <class Matrix, class Vector>
44  Teuchos::RCP<const Matrix> A,
45  Teuchos::RCP<Vector> X,
46  Teuchos::RCP<const Vector> B )
47  : SolverCore<Amesos2::Superlu,Matrix,Vector>(A, X, B)
48 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
49  , sptrsv_invert_diag_(true)
50  , sptrsv_invert_offdiag_ (false)
51  , sptrsv_u_in_csr_ (true)
52  , sptrsv_merge_supernodes_ (false)
53  , sptrsv_use_spmv_ (false)
54 #endif
55  , is_contiguous_(true) // default is set by params
56  , use_triangular_solves_(false) // default is set by params
57  , use_metis_(false)
58  , symmetrize_metis_(true)
59 {
60  // ilu_set_default_options is called later in set parameter list if required.
61  // This is not the ideal way, but the other option to always call
62  // ilu_set_default_options here and assuming it won't have any side effect
63  // in the TPL is more dangerous. It is not a good idea to rely on external
64  // libraries' internal "features".
65  SLU::set_default_options(&(data_.options));
66  // Override some default options
67  data_.options.PrintStat = SLU::NO;
68 
69  SLU::StatInit(&(data_.stat));
70 
71  Kokkos::resize(data_.perm_r, this->globalNumRows_);
72  Kokkos::resize(data_.perm_c, this->globalNumCols_);
73  Kokkos::resize(data_.etree, this->globalNumCols_);
74  Kokkos::resize(data_.R, this->globalNumRows_);
75  Kokkos::resize(data_.C, this->globalNumCols_);
76 
77  data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu
78  data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size
79 
80  data_.equed = 'N'; // No equilibration
81  data_.rowequ = false;
82  data_.colequ = false;
83  data_.A.Store = NULL;
84  data_.L.Store = NULL;
85  data_.U.Store = NULL;
86  data_.X.Store = NULL;
87  data_.B.Store = NULL;
88 
89  ILU_Flag_=false; // default: turn off ILU
90 }
91 
92 
93 template <class Matrix, class Vector>
95 {
96  /* Free Superlu data_types
97  * - Matrices
98  * - Vectors
99  * - Stat object
100  */
101  SLU::StatFree( &(data_.stat) ) ;
102 
103  // Storage is initialized in numericFactorization_impl()
104  if ( data_.A.Store != NULL ){
105  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
106  }
107 
108  // only root allocated these SuperMatrices.
109  if ( data_.L.Store != NULL ){ // will only be true for this->root_
110  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
111  SLU::Destroy_CompCol_Matrix( &(data_.U) );
112  }
113 }
114 
115 template <class Matrix, class Vector>
116 std::string
118 {
119  std::ostringstream oss;
120  oss << "SuperLU solver interface";
121  if (ILU_Flag_) {
122  oss << ", \"ILUTP\" : {";
123  oss << "drop tol = " << data_.options.ILU_DropTol;
124  oss << ", fill factor = " << data_.options.ILU_FillFactor;
125  oss << ", fill tol = " << data_.options.ILU_FillTol;
126  switch(data_.options.ILU_MILU) {
127  case SLU::SMILU_1 :
128  oss << ", MILU 1";
129  break;
130  case SLU::SMILU_2 :
131  oss << ", MILU 2";
132  break;
133  case SLU::SMILU_3 :
134  oss << ", MILU 3";
135  break;
136  case SLU::SILU :
137  default:
138  oss << ", regular ILU";
139  }
140  switch(data_.options.ILU_Norm) {
141  case SLU::ONE_NORM :
142  oss << ", 1-norm";
143  break;
144  case SLU::TWO_NORM :
145  oss << ", 2-norm";
146  break;
147  case SLU::INF_NORM :
148  default:
149  oss << ", infinity-norm";
150  }
151  oss << "}";
152  } else {
153  oss << ", direct solve";
154  }
155  return oss.str();
156  /*
157 
158  // ILU parameters
159  if( parameterList->isParameter("RowPerm") ){
160  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
161  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
162  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
163  }
164 
165 
166  */
167 }
168 
169 template<class Matrix, class Vector>
170 int
172 {
173  /*
174  * Get column permutation vector perm_c[], according to permc_spec:
175  * permc_spec = NATURAL: natural ordering
176  * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
177  * permc_spec = MMD_ATA: minimum degree on structure of A'*A
178  * permc_spec = COLAMD: approximate minimum degree column ordering
179  * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
180  */
181  int permc_spec = data_.options.ColPerm;
182  if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
183 #ifdef HAVE_AMESOS2_TIMERS
184  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
185 #endif
186 
187  SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
188  }
189 
190  return(0);
191 }
192 
193 
194 template <class Matrix, class Vector>
195 int
197 {
198  /*
199  * SuperLU performs symbolic factorization and numeric factorization
200  * together, but does leave some options for reusing symbolic
201  * structures that have been created on previous factorizations. If
202  * our Amesos2 user calls this function, that is an indication that
203  * the symbolic structure of the matrix is no longer valid, and
204  * SuperLU should do the factorization from scratch.
205  *
206  * This can be accomplished by setting the options.Fact flag to
207  * DOFACT, as well as setting our own internal flag to false.
208  */
209 #ifdef HAVE_AMESOS2_TIMERS
210  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
211 #endif
212  same_symbolic_ = false;
213  data_.options.Fact = SLU::DOFACT;
214 
215  if (use_metis_) {
216 #ifdef HAVE_AMESOS2_TIMERS
217  Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for METIS");
218  Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
219 #endif
220  size_t n = this->globalNumRows_;
221  size_t nz = this->globalNumNonZeros_;
222  host_size_type_array host_rowind_view_ = host_rows_view_;
223  host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
224 
225  /* symmetrize the matrix (A + A') if needed */
226  if (symmetrize_metis_) {
227  int new_nz = 0;
228  int *new_col_ptr;
229  int *new_row_ind;
230 
231  // NOTE: both size_type and ordinal_type are defined as int
232  // Consider using "symmetrize_graph" in KokkosKernels, if this becomes significant in time.
233  SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
234  &new_nz, &new_col_ptr, &new_row_ind);
235 
236  // reallocate (so that we won't overwrite the original)
237  // and copy for output
238  nz = new_nz;
239  Kokkos::resize (host_rowind_view_, nz);
240  Kokkos::realloc(host_colptr_view_, n+1);
241  for (size_t i = 0; i <= n; i++) {
242  host_colptr_view_(i) = new_col_ptr[i];
243  }
244  for (size_t i = 0; i < nz; i++) {
245  host_rowind_view_(i) = new_row_ind[i];
246  }
247 
248  // free
249  SLU::SUPERLU_FREE(new_col_ptr);
250  if (new_nz > 0) {
251  SLU::SUPERLU_FREE(new_row_ind);
252  }
253  }
254 
255  // reorder will convert both graph and perm/iperm to the internal METIS integer type
256  using metis_int_array_type = host_ordinal_type_array;
257  metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_perm"), n);
258  metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_iperm"), n);
259 
260  // call METIS
261  size_t new_nnz = 0;
262  Amesos2::Util::reorder(
263  host_colptr_view_, host_rowind_view_,
264  metis_perm, metis_iperm, new_nnz, false);
265 
266  for (size_t i = 0; i < n; i++) {
267  data_.perm_r(i) = metis_iperm(i);
268  data_.perm_c(i) = metis_iperm(i);
269  }
270 
271  // SLU will use user-specified METIS ordering
272  data_.options.ColPerm = SLU::MY_PERMC;
273  data_.options.RowPerm = SLU::MY_PERMR;
274  // Turn off Equil not to mess up METIS ordering?
275  //data_.options.Equil = SLU::NO;
276  }
277  return(0);
278 }
279 
280 
281 template <class Matrix, class Vector>
282 int
284 {
285  using Teuchos::as;
286 
287  // Cleanup old L and U matrices if we are not reusing a symbolic
288  // factorization. Stores and other data will be allocated in gstrf.
289  // Only rank 0 has valid pointers
290  if ( !same_symbolic_ && data_.L.Store != NULL ){
291  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
292  SLU::Destroy_CompCol_Matrix( &(data_.U) );
293  data_.L.Store = NULL;
294  data_.U.Store = NULL;
295  }
296 
297  if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
298 
299  int info = 0;
300  int info2 = 0; // for Equil
301  if ( this->root_ ) {
302 
303 #ifdef HAVE_AMESOS2_DEBUG
304  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
305  std::runtime_error,
306  "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
307  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
308  std::runtime_error,
309  "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
310 #endif
311 
312  if( data_.options.Equil == SLU::YES ){
313  magnitude_type rowcnd, colcnd, amax;
314 
315  // calculate row and column scalings
316  function_map::gsequ(&(data_.A), data_.R.data(),
317  data_.C.data(), &rowcnd, &colcnd,
318  &amax, &info2);
319 
320  if (info2 == 0) {
321  // apply row and column scalings if necessary
322  function_map::laqgs(&(data_.A), data_.R.data(),
323  data_.C.data(), rowcnd, colcnd,
324  amax, &(data_.equed));
325 
326  // check what types of equilibration was actually done
327  data_.rowequ = (data_.equed == 'R') || (data_.equed == 'B');
328  data_.colequ = (data_.equed == 'C') || (data_.equed == 'B');
329  }
330  }
331 
332  if (info2 == 0) {
333  // Apply the column permutation computed in preOrdering. Place the
334  // column-permuted matrix in AC
335  SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
336  data_.etree.data(), &(data_.AC));
337 
338  { // Do factorization
339 #ifdef HAVE_AMESOS2_TIMERS
340  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
341 #endif
342 
343 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
344  std::cout << "Superlu:: Before numeric factorization" << std::endl;
345  std::cout << "nzvals_ : " << nzvals_.toString() << std::endl;
346  std::cout << "rowind_ : " << rowind_.toString() << std::endl;
347  std::cout << "colptr_ : " << colptr_.toString() << std::endl;
348 #endif
349 
350  if(ILU_Flag_==false) {
351  function_map::gstrf(&(data_.options), &(data_.AC),
352  data_.relax, data_.panel_size, data_.etree.data(),
353  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
354  &(data_.L), &(data_.U),
355 #ifdef HAVE_AMESOS2_SUPERLU5_API
356  &(data_.lu),
357 #endif
358  &(data_.stat), &info);
359  }
360  else {
361  function_map::gsitrf(&(data_.options), &(data_.AC),
362  data_.relax, data_.panel_size, data_.etree.data(),
363  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
364  &(data_.L), &(data_.U),
365 #ifdef HAVE_AMESOS2_SUPERLU5_API
366  &(data_.lu),
367 #endif
368  &(data_.stat), &info);
369  }
370 
371  if (data_.options.ConditionNumber == SLU::YES) {
372  char norm[1];
373  if (data_.options.Trans == SLU::NOTRANS) {
374  *(unsigned char *)norm = '1';
375  } else {
376  *(unsigned char *)norm = 'I';
377  }
378 
379  data_.anorm = function_map::langs(norm, &(data_.A));
380  function_map::gscon(norm, &(data_.L), &(data_.U),
381  data_.anorm, &(data_.rcond),
382  &(data_.stat), &info);
383  }
384  }
385  // Cleanup. AC data will be alloc'd again for next factorization (if at all)
386  SLU::Destroy_CompCol_Permuted( &(data_.AC) );
387 
388  // Set the number of non-zero values in the L and U factors
389  this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
390  ((SLU::NCformat*)data_.U.Store)->nnz) );
391  } // end of if (info2 == 0)
392  } // end of if (this->root)
393 
394  if( data_.options.Equil == SLU::YES ) { // error check for Equil
395  /* All processes should have the same error code */
396  Teuchos::broadcast(*(this->getComm()), 0, &info2);
397 
398  TEUCHOS_TEST_FOR_EXCEPTION
399  (info2 < 0, std::runtime_error,
400  "SuperLU's gsequ function returned with status " << info2 << " < 0."
401  " This means that argument " << (-info2) << " given to the function"
402  " had an illegal value.");
403  if (info2 > 0) {
404  if (info2 <= data_.A.nrow) {
405  TEUCHOS_TEST_FOR_EXCEPTION
406  (true, std::runtime_error, "SuperLU's gsequ function returned with "
407  "info = " << info2 << " > 0, and info <= A.nrow = " << data_.A.nrow
408  << ". This means that row " << info2 << " of A is exactly zero.");
409  }
410  else if (info2 > data_.A.ncol) {
411  TEUCHOS_TEST_FOR_EXCEPTION
412  (true, std::runtime_error, "SuperLU's gsequ function returned with "
413  "info = " << info2 << " > 0, and info > A.ncol = " << data_.A.ncol
414  << ". This means that column " << (info2 - data_.A.nrow) << " of "
415  "A is exactly zero.");
416  }
417  else {
418  TEUCHOS_TEST_FOR_EXCEPTION
419  (true, std::runtime_error, "SuperLU's gsequ function returned "
420  "with info = " << info2 << " > 0, but its value is not in the "
421  "range permitted by the documentation. This should never happen "
422  "(it appears to be SuperLU's fault).");
423  }
424  }
425  }
426 
427  /* All processes should have the same error code */
428  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
429 
430  global_size_type info_st = as<global_size_type>(info);
431  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
432  std::runtime_error,
433  "Factorization complete, but matrix is singular. Division by zero eminent");
434  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
435  std::runtime_error,
436  "Memory allocation failure in Superlu factorization");
437 
438  data_.options.Fact = SLU::FACTORED;
439  same_symbolic_ = true;
440 
441  if(use_triangular_solves_) {
442 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
443  #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
444  if (this->getComm()->getRank()) {
445  std::cout << " > Metis : " << (use_metis_ ? "YES" : "NO") << std::endl;
446  std::cout << " > Equil : " << (data_.options.Equil == SLU::YES ? "YES" : "NO") << std::endl;
447  std::cout << " > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ? "YES" : "NO") << std::endl;
448  std::cout << " > Invert diag : " << sptrsv_invert_diag_ << std::endl;
449  std::cout << " > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
450  std::cout << " > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
451  std::cout << " > Merge : " << sptrsv_merge_supernodes_ << std::endl;
452  std::cout << " > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
453  }
454  //std::cout << myRank << " : siize(A) " << (data_.A.nrow) << " x " << (data_.A.ncol) << std::endl;
455  //std::cout << myRank << " : nnz(A) " << ((SLU::NCformat*)data_.A.Store)->nnz << std::endl;
456  //std::cout << myRank << " : nnz(L) " << ((SLU::SCformat*)data_.L.Store)->nnz << std::endl;
457  //std::cout << myRank << " : nnz(U) " << ((SLU::SCformat*)data_.U.Store)->nnz << std::endl;
458  #endif
459 #endif
460 #ifdef HAVE_AMESOS2_TIMERS
461  Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv setup");
462  Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
463 #endif
464  triangular_solve_factor();
465  }
466 
467  return(info);
468 }
469 
470 
471 template <class Matrix, class Vector>
472 int
474  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
475 {
476  using Teuchos::as;
477 #ifdef HAVE_AMESOS2_TIMERS
478  Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for Amesos2");
479  Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
480 #endif
481 
482  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
483  const size_t nrhs = X->getGlobalNumVectors();
484 
485  bool bDidAssignX = false; // will be set below
486  bool bDidAssignB = false; // will be set below
487  { // Get values from RHS B
488 #ifdef HAVE_AMESOS2_TIMERS
489  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
490  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
491 #endif
492 
493  // In general we may want to write directly to the x space without a copy.
494  // So we 'get' x which may be a direct view assignment to the MV.
495  const bool initialize_data = true;
496  const bool do_not_initialize_data = false;
497  if(use_triangular_solves_) { // to device
498 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
499  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
500  device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
501  as<size_t>(ld_rhs),
502  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
503  this->rowIndexBase_);
504  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
505  device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
506  as<size_t>(ld_rhs),
507  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
508  this->rowIndexBase_);
509 #endif
510  }
511  else { // to host
512  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
513  host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
514  as<size_t>(ld_rhs),
515  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
516  this->rowIndexBase_);
517  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
518  host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
519  as<size_t>(ld_rhs),
520  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
521  this->rowIndexBase_);
522  }
523  }
524 
525  // If equilibration was applied at numeric, then gssvx and gsisx are going to
526  // modify B, so we can't use the optimized assignment to B since we'll change
527  // the source test vector and then fail the subsequent cycle. We need a copy.
528  // If bDidAssignB is false, then we skip the copy since it was copied already.
529  if(bDidAssignB && data_.equed != 'N') {
530  if(use_triangular_solves_) { // to device
531 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
532  device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
533  device_bValues_.extent(0), device_bValues_.extent(1));
534  Kokkos::deep_copy(copyB, device_bValues_);
535  device_bValues_ = copyB;
536 #endif
537  }
538  else {
539  host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
540  host_bValues_.extent(0), host_bValues_.extent(1));
541  Kokkos::deep_copy(copyB, host_bValues_);
542  host_bValues_ = copyB;
543  }
544  }
545 
546  int ierr = 0; // returned error code
547 
548  magnitude_type rpg, rcond;
549  if ( this->root_ ) {
550  // Note: the values of B and X (after solution) are adjusted
551  // appropriately within gssvx for row and column scalings.
552  { // Do solve!
553 #ifdef HAVE_AMESOS2_TIMERS
554  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
555 #endif
556 
557  if(use_triangular_solves_) {
558  triangular_solve();
559  }
560  else {
561  Kokkos::resize(data_.ferr, nrhs);
562  Kokkos::resize(data_.berr, nrhs);
563 
564  {
565  #ifdef HAVE_AMESOS2_TIMERS
566  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
567  #endif
568  SLU::Dtype_t dtype = type_map::dtype;
569  int i_ld_rhs = as<int>(ld_rhs);
570  function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
571  convert_bValues_, host_bValues_, i_ld_rhs,
572  SLU::SLU_DN, dtype, SLU::SLU_GE);
573  function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
574  convert_xValues_, host_xValues_, i_ld_rhs,
575  SLU::SLU_DN, dtype, SLU::SLU_GE);
576  }
577 
578  if(ILU_Flag_==false) {
579  function_map::gssvx(&(data_.options), &(data_.A),
580  data_.perm_c.data(), data_.perm_r.data(),
581  data_.etree.data(), &(data_.equed), data_.R.data(),
582  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
583  &(data_.X), &rpg, &rcond, data_.ferr.data(),
584  data_.berr.data(),
585  #ifdef HAVE_AMESOS2_SUPERLU5_API
586  &(data_.lu),
587  #endif
588  &(data_.mem_usage), &(data_.stat), &ierr);
589  }
590  else {
591  function_map::gsisx(&(data_.options), &(data_.A),
592  data_.perm_c.data(), data_.perm_r.data(),
593  data_.etree.data(), &(data_.equed), data_.R.data(),
594  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
595  &(data_.X), &rpg, &rcond,
596  #ifdef HAVE_AMESOS2_SUPERLU5_API
597  &(data_.lu),
598  #endif
599  &(data_.mem_usage), &(data_.stat), &ierr);
600  }
601 
602  // Cleanup X and B stores
603  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
604  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
605  data_.X.Store = NULL;
606  data_.B.Store = NULL;
607  }
608 
609  } // do solve
610 
611  // convert back to Kokkos views
612  {
613 #ifdef HAVE_AMESOS2_TIMERS
614  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
615 #endif
616  function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
617  function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
618  }
619 
620  // Cleanup X and B stores
621  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
622  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
623  data_.X.Store = NULL;
624  data_.B.Store = NULL;
625  }
626 
627  /* All processes should have the same error code */
628  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
629 
630  global_size_type ierr_st = as<global_size_type>(ierr);
631  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
632  std::invalid_argument,
633  "Argument " << -ierr << " to SuperLU xgssvx had illegal value" );
634  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
635  std::runtime_error,
636  "Factorization complete, but U is exactly singular" );
637  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
638  std::runtime_error,
639  "SuperLU allocated " << ierr - this->globalNumCols_ << " bytes of "
640  "memory before allocation failure occured." );
641 
642  /* Update X's global values */
643 
644  // if bDidAssignX, then we solved straight to the adapter's X memory space without
645  // requiring additional memory allocation, so the x data is already in place.
646  if(!bDidAssignX) {
647 #ifdef HAVE_AMESOS2_TIMERS
648  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
649 #endif
650 
651  if(use_triangular_solves_) { // to device
652 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
653  Util::put_1d_data_helper_kokkos_view<
654  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
655  as<size_t>(ld_rhs),
656  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
657  this->rowIndexBase_);
658 #endif
659  }
660  else {
661  Util::put_1d_data_helper_kokkos_view<
662  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
663  as<size_t>(ld_rhs),
664  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
665  this->rowIndexBase_);
666  }
667  }
668 
669  return(ierr);
670 }
671 
672 
673 template <class Matrix, class Vector>
674 bool
676 {
677  // The Superlu factorization routines can handle square as well as
678  // rectangular matrices, but Superlu can only apply the solve routines to
679  // square matrices, so we check the matrix for squareness.
680  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
681 }
682 
683 
684 template <class Matrix, class Vector>
685 void
686 Superlu<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
687 {
688  using Teuchos::RCP;
689  using Teuchos::getIntegralValue;
690  using Teuchos::ParameterEntryValidator;
691 
692  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
693 
694  ILU_Flag_ = parameterList->get<bool>("ILU_Flag",false);
695  if (ILU_Flag_) {
696  SLU::ilu_set_default_options(&(data_.options));
697  // Override some default options
698  data_.options.PrintStat = SLU::NO;
699  }
700 
701  data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
702  // The SuperLU transpose option can override the Amesos2 option
703  if( parameterList->isParameter("Trans") ){
704  RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
705  parameterList->getEntry("Trans").setValidator(trans_validator);
706 
707  data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList, "Trans");
708  }
709 
710  if( parameterList->isParameter("IterRefine") ){
711  RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry("IterRefine").validator();
712  parameterList->getEntry("IterRefine").setValidator(refine_validator);
713 
714  data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList, "IterRefine");
715  }
716 
717  if( parameterList->isParameter("ColPerm") ){
718  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
719  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
720 
721  data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList, "ColPerm");
722  }
723 
724  data_.options.DiagPivotThresh = parameterList->get<double>("DiagPivotThresh", 1.0);
725 
726  bool equil = parameterList->get<bool>("Equil", true);
727  data_.options.Equil = equil ? SLU::YES : SLU::NO;
728 
729  bool condNum = parameterList->get<bool>("ConditionNumber", false);
730  data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
731 
732  bool symmetric_mode = parameterList->get<bool>("SymmetricMode", false);
733  data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
734 
735  // ILU parameters
736  if( parameterList->isParameter("RowPerm") ){
737  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
738  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
739  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
740  }
741 
742  /*if( parameterList->isParameter("ILU_DropRule") ) {
743  RCP<const ParameterEntryValidator> droprule_validator = valid_params->getEntry("ILU_DropRule").validator();
744  parameterList->getEntry("ILU_DropRule").setValidator(droprule_validator);
745  data_.options.ILU_DropRule = getIntegralValue<SLU::rule_t>(*parameterList, "ILU_DropRule");
746  }*/
747 
748  data_.options.ILU_DropTol = parameterList->get<double>("ILU_DropTol", 0.0001);
749 
750  data_.options.ILU_FillFactor = parameterList->get<double>("ILU_FillFactor", 10.0);
751 
752  if( parameterList->isParameter("ILU_Norm") ) {
753  RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry("ILU_Norm").validator();
754  parameterList->getEntry("ILU_Norm").setValidator(norm_validator);
755  data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList, "ILU_Norm");
756  }
757 
758  if( parameterList->isParameter("ILU_MILU") ) {
759  RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry("ILU_MILU").validator();
760  parameterList->getEntry("ILU_MILU").setValidator(milu_validator);
761  data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList, "ILU_MILU");
762  }
763 
764  data_.options.ILU_FillTol = parameterList->get<double>("ILU_FillTol", 0.01);
765 
766  is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
767 
768  use_metis_ = parameterList->get<bool>("UseMetis", false);
769  symmetrize_metis_ = parameterList->get<bool>("SymmetrizeMetis", true);
770 
771  use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
772  if(use_triangular_solves_) {
773 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
774  // specify whether to invert diagonal blocks
775  sptrsv_invert_diag_ = parameterList->get<bool>("SpTRSV_Invert_Diag", true);
776  // specify whether to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
777  sptrsv_invert_offdiag_ = parameterList->get<bool>("SpTRSV_Invert_OffDiag", false);
778  // specify whether to store U in CSR format
779  sptrsv_u_in_csr_ = parameterList->get<bool>("SpTRSV_U_CSR", true);
780  // specify whether to merge supernodes with similar sparsity structures
781  sptrsv_merge_supernodes_ = parameterList->get<bool>("SpTRSV_Merge_Supernodes", false);
782  // specify whether to use spmv for sptrsv
783  sptrsv_use_spmv_ = parameterList->get<bool>("SpTRSV_Use_SpMV", false);
784 #else
785  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
786  "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
787 #endif
788  }
789 }
790 
791 
792 template <class Matrix, class Vector>
793 Teuchos::RCP<const Teuchos::ParameterList>
795 {
796  using std::string;
797  using Teuchos::tuple;
798  using Teuchos::ParameterList;
799  using Teuchos::EnhancedNumberValidator;
800  using Teuchos::setStringToIntegralParameter;
801  using Teuchos::stringToIntegralParameterEntryValidator;
802 
803  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
804 
805  if( is_null(valid_params) ){
806  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
807 
808  setStringToIntegralParameter<SLU::trans_t>("Trans", "NOTRANS",
809  "Solve for the transpose system or not",
810  tuple<string>("TRANS","NOTRANS","CONJ"),
811  tuple<string>("Solve with transpose",
812  "Do not solve with transpose",
813  "Solve with the conjugate transpose"),
814  tuple<SLU::trans_t>(SLU::TRANS,
815  SLU::NOTRANS,
816  SLU::CONJ),
817  pl.getRawPtr());
818 
819  setStringToIntegralParameter<SLU::IterRefine_t>("IterRefine", "NOREFINE",
820  "Type of iterative refinement to use",
821  tuple<string>("NOREFINE", "SLU_SINGLE", "SLU_DOUBLE"),
822  tuple<string>("Do not use iterative refinement",
823  "Do single iterative refinement",
824  "Do double iterative refinement"),
825  tuple<SLU::IterRefine_t>(SLU::NOREFINE,
826  SLU::SLU_SINGLE,
827  SLU::SLU_DOUBLE),
828  pl.getRawPtr());
829 
830  // Note: MY_PERMC not yet supported
831  setStringToIntegralParameter<SLU::colperm_t>("ColPerm", "COLAMD",
832  "Specifies how to permute the columns of the "
833  "matrix for sparsity preservation",
834  tuple<string>("NATURAL", "MMD_AT_PLUS_A",
835  "MMD_ATA", "COLAMD"),
836  tuple<string>("Natural ordering",
837  "Minimum degree ordering on A^T + A",
838  "Minimum degree ordering on A^T A",
839  "Approximate minimum degree column ordering"),
840  tuple<SLU::colperm_t>(SLU::NATURAL,
841  SLU::MMD_AT_PLUS_A,
842  SLU::MMD_ATA,
843  SLU::COLAMD),
844  pl.getRawPtr());
845 
846  Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
847  = Teuchos::rcp( new EnhancedNumberValidator<double>(0.0, 1.0) );
848  pl->set("DiagPivotThresh", 1.0,
849  "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
850  diag_pivot_thresh_validator); // partial pivoting
851 
852  pl->set("Equil", true, "Whether to equilibrate the system before solve");
853  pl->set("ConditionNumber", false, "Whether to approximate condition number");
854 
855  pl->set("SymmetricMode", false,
856  "Specifies whether to use the symmetric mode. "
857  "Gives preference to diagonal pivots and uses "
858  "an (A^T + A)-based column permutation.");
859 
860  // ILU parameters
861 
862  setStringToIntegralParameter<SLU::rowperm_t>("RowPerm", "LargeDiag",
863  "Type of row permutation strategy to use",
864  tuple<string>("NOROWPERM","LargeDiag","MY_PERMR"),
865  tuple<string>("Use natural ordering",
866  "Use weighted bipartite matching algorithm",
867  "Use the ordering given in perm_r input"),
868  tuple<SLU::rowperm_t>(SLU::NOROWPERM,
869 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
870  SLU::LargeDiag_MC64,
871 #else
872  SLU::LargeDiag,
873 #endif
874  SLU::MY_PERMR),
875  pl.getRawPtr());
876 
877  /*setStringToIntegralParameter<SLU::rule_t>("ILU_DropRule", "DROP_BASIC",
878  "Type of dropping strategy to use",
879  tuple<string>("DROP_BASIC","DROP_PROWS",
880  "DROP_COLUMN","DROP_AREA",
881  "DROP_DYNAMIC","DROP_INTERP"),
882  tuple<string>("ILUTP(t)","ILUTP(p,t)",
883  "Variant of ILUTP(p,t) for j-th column",
884  "Variant of ILUTP to control memory",
885  "Dynamically adjust threshold",
886  "Compute second dropping threshold by interpolation"),
887  tuple<SLU::rule_t>(SLU::DROP_BASIC,SLU::DROP_PROWS,SLU::DROP_COLUMN,
888  SLU::DROP_AREA,SLU::DROP_DYNAMIC,SLU::DROP_INTERP),
889  pl.getRawPtr());*/
890 
891  pl->set("ILU_DropTol", 0.0001, "ILUT drop tolerance");
892 
893  pl->set("ILU_FillFactor", 10.0, "ILUT fill factor");
894 
895  setStringToIntegralParameter<SLU::norm_t>("ILU_Norm", "INF_NORM",
896  "Type of norm to use",
897  tuple<string>("ONE_NORM","TWO_NORM","INF_NORM"),
898  tuple<string>("1-norm","2-norm","inf-norm"),
899  tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
900  pl.getRawPtr());
901 
902  setStringToIntegralParameter<SLU::milu_t>("ILU_MILU", "SILU",
903  "Type of modified ILU to use",
904  tuple<string>("SILU","SMILU_1","SMILU_2","SMILU_3"),
905  tuple<string>("Regular ILU","MILU 1","MILU 2","MILU 3"),
906  tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
907  SLU::SMILU_3),
908  pl.getRawPtr());
909 
910  pl->set("ILU_FillTol", 0.01, "ILUT fill tolerance");
911 
912  pl->set("ILU_Flag", false, "ILU flag: if true, run ILU routines");
913 
914  pl->set("IsContiguous", true, "Whether GIDs contiguous");
915 
916  // call METIS before SuperLU
917  pl->set("UseMetis", false, "Whether to call METIS before SuperLU");
918  pl->set("SymmetrizeMetis", true, "Whether to symmetrize matrix before METIS");
919 
920  pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
921 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
922  pl->set("SpTRSV_Invert_Diag", true, "specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
923  pl->set("SpTRSV_Invert_OffDiag", false, "specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
924  pl->set("SpTRSV_U_CSR", true, "specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
925  pl->set("SpTRSV_Merge_Supernodes", false, "specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
926  pl->set("SpTRSV_Use_SpMV", false, "specify whether to use spmv for supernodal sparse-trianguular solve");
927 #endif
928 
929  valid_params = pl;
930  }
931 
932  return valid_params;
933 }
934 
935 
936 template <class Matrix, class Vector>
937 bool
939 {
940  using Teuchos::as;
941 
942 #ifdef HAVE_AMESOS2_TIMERS
943  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
944 #endif
945 
946  // SuperLU does not need the matrix at symbolicFactorization()
947  if( current_phase == SYMBFACT ) return false;
948 
949  // Cleanup old store memory if it's non-NULL (should only ever be non-NULL at root_)
950  if( data_.A.Store != NULL ){
951  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
952  data_.A.Store = NULL;
953  }
954 
955  // Only the root image needs storage allocated
956  if( this->root_ ){
957  Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
958  Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
959  Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
960  }
961 
962  int nnz_ret = 0;
963  {
964 #ifdef HAVE_AMESOS2_TIMERS
965  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
966 #endif
967 
968  TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
969  std::runtime_error,
970  "Row and column maps have different indexbase ");
971 
973  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
974  host_size_type_array>::do_get(this->matrixA_.ptr(),
975  host_nzvals_view_, host_rows_view_,
976  host_col_ptr_view_, nnz_ret,
977  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
978  ARBITRARY,
979  this->rowIndexBase_);
980  }
981 
982  // Get the SLU data type for this type of matrix
983  SLU::Dtype_t dtype = type_map::dtype;
984 
985  if( this->root_ ){
986  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
987  std::runtime_error,
988  "Did not get the expected number of non-zero vals");
989 
990  function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
991  this->globalNumRows_, this->globalNumCols_,
992  nnz_ret,
993  convert_nzvals_, host_nzvals_view_,
994  host_rows_view_.data(),
995  host_col_ptr_view_.data(),
996  SLU::SLU_NC, dtype, SLU::SLU_GE);
997  }
998 
999  return true;
1000 }
1001 
1002 template <class Matrix, class Vector>
1003 void
1005 {
1006 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1007  size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1008 
1009  // convert etree to parents
1010  SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1011  int nsuper = 1 + Lstore->nsuper; // # of supernodal columns
1012  Kokkos::resize(data_.parents, nsuper);
1013  for (int s = 0; s < nsuper; s++) {
1014  int j = Lstore->sup_to_col[s+1]-1; // the last column index of this supernode
1015  if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1016  data_.parents(s) = -1;
1017  } else {
1018  data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1019  }
1020  }
1021 
1022  deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r); // will use device to permute
1023  deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c); // will use device to permute
1024  if (data_.options.Equil == SLU::YES) {
1025  deep_copy_or_assign_view(device_trsv_R_, data_.R); // will use device to scale
1026  deep_copy_or_assign_view(device_trsv_C_, data_.C); // will use device to scale
1027  }
1028 
1029  bool condition_flag = false;
1030  if (data_.options.ConditionNumber == SLU::YES) {
1031  using STM = Teuchos::ScalarTraits<magnitude_type>;
1032  const magnitude_type eps = STM::eps ();
1033 
1034  SCformat *Lstore = (SCformat*)(data_.L.Store);
1035  int nsuper = 1 + Lstore->nsuper;
1036  int *nb = Lstore->sup_to_col;
1037  int max_cols = 0;
1038  for (int i = 0; i < nsuper; i++) {
1039  if (nb[i+1] - nb[i] > max_cols) {
1040  max_cols = nb[i+1] - nb[i];
1041  }
1042  }
1043 
1044  // when rcond is small, it is ill-conditioned and flag is true
1045  const magnitude_type multiply_fact (10.0); // larger the value, more likely flag is true, and no invert
1046  condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1047 
1048 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1049  int n = data_.perm_r.extent(0);
1050  std::cout << this->getComm()->getRank()
1051  << " : anorm = " << data_.anorm << ", rcond = " << data_.rcond << ", n = " << n
1052  << ", num super cols = " << nsuper << ", max super cols = " << max_cols
1053  << " -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1054  << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ? " (okay)" : " (warn)") << std::endl;
1055 #endif
1056  }
1057 
1058  // Create handles for U and U^T solves
1059  if (sptrsv_use_spmv_ && !condition_flag) {
1060  device_khL_.create_sptrsv_handle(
1061  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, true);
1062  device_khU_.create_sptrsv_handle(
1063  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, false);
1064  } else {
1065  device_khL_.create_sptrsv_handle(
1066  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, true);
1067  device_khU_.create_sptrsv_handle(
1068  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, false);
1069  }
1070 
1071  // specify whether to store U in CSR format
1072  device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1073 
1074  // specify whether to merge supernodes with similar sparsity structure
1075  device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1076  device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1077 
1078  // invert only if flag is not on
1079  bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1080  bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1081 
1082  // specify whether to invert diagonal blocks
1083  device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1084  device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1085 
1086  // specify wheather to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
1087  device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1088  device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1089 
1090  // set etree
1091  device_khL_.set_sptrsv_etree(data_.parents.data());
1092  device_khU_.set_sptrsv_etree(data_.parents.data());
1093 
1094  // set permutation
1095  device_khL_.set_sptrsv_perm(data_.perm_r.data());
1096  device_khU_.set_sptrsv_perm(data_.perm_c.data());
1097 
1098  int block_size = -1; // TODO: add needed params
1099  if (block_size >= 0) {
1100  std::cout << " Block Size : " << block_size << std::endl;
1101  device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1102  device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1103  }
1104 
1105  // Do symbolic analysis
1106  {
1107 #ifdef HAVE_AMESOS2_TIMERS
1108  Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv symbolic");
1109  Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1110 #endif
1111  KokkosSparse::Experimental::sptrsv_symbolic
1112  (&device_khL_, &device_khU_, data_.L, data_.U);
1113  }
1114 
1115  // Do numerical compute
1116  {
1117 #ifdef HAVE_AMESOS2_TIMERS
1118  Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv numeric");
1119  Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1120 #endif
1121  KokkosSparse::Experimental::sptrsv_compute
1122  (&device_khL_, &device_khU_, data_.L, data_.U);
1123  }
1124 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1125 }
1126 
1127 template <class Matrix, class Vector>
1128 void
1129 Superlu<Matrix,Vector>::triangular_solve() const
1130 {
1131 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1132  size_t ld_rhs = device_xValues_.extent(0);
1133  size_t nrhs = device_xValues_.extent(1);
1134 
1135  Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1136  Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1137 
1138  // forward pivot
1139  auto local_device_bValues = device_bValues_;
1140  auto local_device_trsv_perm_r = device_trsv_perm_r_;
1141  auto local_device_trsv_rhs = device_trsv_rhs_;
1142 
1143  if (data_.rowequ) {
1144  auto local_device_trsv_R = device_trsv_R_;
1145  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1146  KOKKOS_LAMBDA(size_t j) {
1147  for(size_t k = 0; k < nrhs; ++k) {
1148  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1149  }
1150  });
1151  } else {
1152  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1153  KOKKOS_LAMBDA(size_t j) {
1154  for(size_t k = 0; k < nrhs; ++k) {
1155  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1156  }
1157  });
1158  }
1159 
1160  for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
1161  auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1162  auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1163 
1164  // do L solve= - numeric (only rhs is modified) on the default device/host space
1165  {
1166 #ifdef HAVE_AMESOS2_TIMERS
1167  Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for L solve");
1168  Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1169 #endif
1170  KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1171  }
1172  // do L^T solve - numeric (only rhs is modified) on the default device/host space
1173  {
1174 #ifdef HAVE_AMESOS2_TIMERS
1175  Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for U solve");
1176  Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1177 #endif
1178  KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1179  }
1180  } // end loop over rhs vectors
1181 
1182  // backward pivot
1183  auto local_device_xValues = device_xValues_;
1184  auto local_device_trsv_perm_c = device_trsv_perm_c_;
1185  if (data_.colequ) {
1186  auto local_device_trsv_C = device_trsv_C_;
1187  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1188  KOKKOS_LAMBDA(size_t j) {
1189  for(size_t k = 0; k < nrhs; ++k) {
1190  local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1191  }
1192  });
1193  } else {
1194  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1195  KOKKOS_LAMBDA(size_t j) {
1196  for(size_t k = 0; k < nrhs; ++k) {
1197  local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1198  }
1199  });
1200  }
1201 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1202 }
1203 
1204 template<class Matrix, class Vector>
1205 const char* Superlu<Matrix,Vector>::name = "SuperLU";
1206 
1207 
1208 } // end namespace Amesos2
1209 
1210 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:938
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:283
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:94
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:675
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:794
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:117
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:473
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superlu_def.hpp:686
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:171
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:196
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:42