Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superlu_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
19 #ifndef AMESOS2_SUPERLU_DEF_HPP
20 #define AMESOS2_SUPERLU_DEF_HPP
21 
22 #include <Teuchos_Tuple.hpp>
23 #include <Teuchos_ParameterList.hpp>
24 #include <Teuchos_StandardParameterEntryValidators.hpp>
25 
27 #include "Amesos2_Superlu_decl.hpp"
28 
29 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
30 // TODO: This 'using namespace SLU' is not a good thing.
31 // It was added because kernels does not namespace SuperLU but Amesos2 does.
32 // Declaring the namespace SLU allows us to reuse that file without duplication.
33 // We need to determine a uniform system to avoid this this but for now, at least
34 // this issue is only present when TRSV is enabled.
35 using namespace SLU;
36 #include "KokkosSparse_sptrsv_superlu.hpp"
37 #endif
38 
39 namespace Amesos2 {
40 
41 
42 template <class Matrix, class Vector>
44  Teuchos::RCP<const Matrix> A,
45  Teuchos::RCP<Vector> X,
46  Teuchos::RCP<const Vector> B )
47  : SolverCore<Amesos2::Superlu,Matrix,Vector>(A, X, B)
48 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
49  , sptrsv_invert_diag_(true)
50  , sptrsv_invert_offdiag_ (false)
51  , sptrsv_u_in_csr_ (true)
52  , sptrsv_merge_supernodes_ (false)
53  , sptrsv_use_spmv_ (false)
54 #endif
55  , is_contiguous_(true) // default is set by params
56  , use_triangular_solves_(false) // default is set by params
57  , use_metis_(false)
58  , symmetrize_metis_(true)
59 {
60  // ilu_set_default_options is called later in set parameter list if required.
61  // This is not the ideal way, but the other option to always call
62  // ilu_set_default_options here and assuming it won't have any side effect
63  // in the TPL is more dangerous. It is not a good idea to rely on external
64  // libraries' internal "features".
65  SLU::set_default_options(&(data_.options));
66  // Override some default options
67  data_.options.PrintStat = SLU::NO;
68 
69  SLU::StatInit(&(data_.stat));
70 
71  Kokkos::resize(data_.perm_r, this->globalNumRows_);
72  Kokkos::resize(data_.perm_c, this->globalNumCols_);
73  Kokkos::resize(data_.etree, this->globalNumCols_);
74  Kokkos::resize(data_.R, this->globalNumRows_);
75  Kokkos::resize(data_.C, this->globalNumCols_);
76 
77  data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu
78  data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size
79 
80  data_.equed = 'N'; // No equilibration
81  data_.rowequ = false;
82  data_.colequ = false;
83  data_.A.Store = NULL;
84  data_.L.Store = NULL;
85  data_.U.Store = NULL;
86  data_.X.Store = NULL;
87  data_.B.Store = NULL;
88 
89  ILU_Flag_=false; // default: turn off ILU
90 }
91 
92 
93 template <class Matrix, class Vector>
95 {
96  /* Free Superlu data_types
97  * - Matrices
98  * - Vectors
99  * - Stat object
100  */
101  SLU::StatFree( &(data_.stat) ) ;
102 
103  // Storage is initialized in numericFactorization_impl()
104  if ( data_.A.Store != NULL ){
105  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
106  }
107 
108  // only root allocated these SuperMatrices.
109  if ( data_.L.Store != NULL ){ // will only be true for this->root_
110  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
111  SLU::Destroy_CompCol_Matrix( &(data_.U) );
112  }
113 }
114 
115 template <class Matrix, class Vector>
116 std::string
118 {
119  std::ostringstream oss;
120  oss << "SuperLU solver interface";
121  if (ILU_Flag_) {
122  oss << ", \"ILUTP\" : {";
123  oss << "drop tol = " << data_.options.ILU_DropTol;
124  oss << ", fill factor = " << data_.options.ILU_FillFactor;
125  oss << ", fill tol = " << data_.options.ILU_FillTol;
126  switch(data_.options.ILU_MILU) {
127  case SLU::SMILU_1 :
128  oss << ", MILU 1";
129  break;
130  case SLU::SMILU_2 :
131  oss << ", MILU 2";
132  break;
133  case SLU::SMILU_3 :
134  oss << ", MILU 3";
135  break;
136  case SLU::SILU :
137  default:
138  oss << ", regular ILU";
139  }
140  switch(data_.options.ILU_Norm) {
141  case SLU::ONE_NORM :
142  oss << ", 1-norm";
143  break;
144  case SLU::TWO_NORM :
145  oss << ", 2-norm";
146  break;
147  case SLU::INF_NORM :
148  default:
149  oss << ", infinity-norm";
150  }
151  oss << "}";
152  } else {
153  oss << ", direct solve";
154  }
155  return oss.str();
156  /*
157 
158  // ILU parameters
159  if( parameterList->isParameter("RowPerm") ){
160  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
161  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
162  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
163  }
164 
165 
166  */
167 }
168 
169 template<class Matrix, class Vector>
170 int
172 {
173  /*
174  * Get column permutation vector perm_c[], according to permc_spec:
175  * permc_spec = NATURAL: natural ordering
176  * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
177  * permc_spec = MMD_ATA: minimum degree on structure of A'*A
178  * permc_spec = COLAMD: approximate minimum degree column ordering
179  * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
180  */
181  int permc_spec = data_.options.ColPerm;
182  if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
183 #ifdef HAVE_AMESOS2_TIMERS
184  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
185 #endif
186 
187  SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
188  }
189 
190  return(0);
191 }
192 
193 
194 template <class Matrix, class Vector>
195 int
197 {
198  /*
199  * SuperLU performs symbolic factorization and numeric factorization
200  * together, but does leave some options for reusing symbolic
201  * structures that have been created on previous factorizations. If
202  * our Amesos2 user calls this function, that is an indication that
203  * the symbolic structure of the matrix is no longer valid, and
204  * SuperLU should do the factorization from scratch.
205  *
206  * This can be accomplished by setting the options.Fact flag to
207  * DOFACT, as well as setting our own internal flag to false.
208  */
209 #ifdef HAVE_AMESOS2_TIMERS
210  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
211 #endif
212  same_symbolic_ = false;
213  data_.options.Fact = SLU::DOFACT;
214 
215  if (use_metis_) {
216 #ifdef HAVE_AMESOS2_TIMERS
217  Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for METIS");
218  Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
219 #endif
220  size_t n = this->globalNumRows_;
221  size_t nz = this->globalNumNonZeros_;
222  host_size_type_array host_rowind_view_ = host_rows_view_;
223  host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
224 
225  /* symmetrize the matrix (A + A') if needed */
226  if (symmetrize_metis_) {
227  int new_nz = 0;
228  int *new_col_ptr;
229  int *new_row_ind;
230 
231  // NOTE: both size_type and ordinal_type are defined as int
232  // Consider using "symmetrize_graph" in KokkosKernels, if this becomes significant in time.
233  SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
234  &new_nz, &new_col_ptr, &new_row_ind);
235 
236  // reallocate (so that we won't overwrite the original)
237  // and copy for output
238  nz = new_nz;
239  Kokkos::resize (host_rowind_view_, nz);
240  Kokkos::realloc(host_colptr_view_, n+1);
241  for (size_t i = 0; i <= n; i++) {
242  host_colptr_view_(i) = new_col_ptr[i];
243  }
244  for (size_t i = 0; i < nz; i++) {
245  host_rowind_view_(i) = new_row_ind[i];
246  }
247 
248  // free
249  SLU::SUPERLU_FREE(new_col_ptr);
250  if (new_nz > 0) {
251  SLU::SUPERLU_FREE(new_row_ind);
252  }
253  }
254 
255  // reorder will convert both graph and perm/iperm to the internal METIS integer type
256  using metis_int_array_type = host_ordinal_type_array;
257  metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_perm"), n);
258  metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_iperm"), n);
259 
260  // call METIS
261  size_t new_nnz = 0;
262  Amesos2::Util::reorder(
263  host_colptr_view_, host_rowind_view_,
264  metis_perm, metis_iperm, new_nnz, false);
265 
266  for (size_t i = 0; i < n; i++) {
267  data_.perm_r(i) = metis_iperm(i);
268  data_.perm_c(i) = metis_iperm(i);
269  }
270 
271  // SLU will use user-specified METIS ordering
272  data_.options.ColPerm = SLU::MY_PERMC;
273  data_.options.RowPerm = SLU::MY_PERMR;
274  // Turn off Equil not to mess up METIS ordering?
275  //data_.options.Equil = SLU::NO;
276  }
277  return(0);
278 }
279 
280 
281 template <class Matrix, class Vector>
282 int
284 {
285  using Teuchos::as;
286 
287  // Cleanup old L and U matrices if we are not reusing a symbolic
288  // factorization. Stores and other data will be allocated in gstrf.
289  // Only rank 0 has valid pointers
290  if ( !same_symbolic_ && data_.L.Store != NULL ){
291  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
292  SLU::Destroy_CompCol_Matrix( &(data_.U) );
293  data_.L.Store = NULL;
294  data_.U.Store = NULL;
295  }
296 
297  if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
298 
299  int info = 0;
300  if ( this->root_ ){
301 
302 #ifdef HAVE_AMESOS2_DEBUG
303  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
304  std::runtime_error,
305  "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
306  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
307  std::runtime_error,
308  "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
309 #endif
310 
311  if( data_.options.Equil == SLU::YES ){
312  magnitude_type rowcnd, colcnd, amax;
313  int info2 = 0;
314 
315  // calculate row and column scalings
316  function_map::gsequ(&(data_.A), data_.R.data(),
317  data_.C.data(), &rowcnd, &colcnd,
318  &amax, &info2);
319  TEUCHOS_TEST_FOR_EXCEPTION
320  (info2 < 0, std::runtime_error,
321  "SuperLU's gsequ function returned with status " << info2 << " < 0."
322  " This means that argument " << (-info2) << " given to the function"
323  " had an illegal value.");
324  if (info2 > 0) {
325  if (info2 <= data_.A.nrow) {
326  TEUCHOS_TEST_FOR_EXCEPTION
327  (true, std::runtime_error, "SuperLU's gsequ function returned with "
328  "info = " << info2 << " > 0, and info <= A.nrow = " << data_.A.nrow
329  << ". This means that row " << info2 << " of A is exactly zero.");
330  }
331  else if (info2 > data_.A.ncol) {
332  TEUCHOS_TEST_FOR_EXCEPTION
333  (true, std::runtime_error, "SuperLU's gsequ function returned with "
334  "info = " << info2 << " > 0, and info > A.ncol = " << data_.A.ncol
335  << ". This means that column " << (info2 - data_.A.nrow) << " of "
336  "A is exactly zero.");
337  }
338  else {
339  TEUCHOS_TEST_FOR_EXCEPTION
340  (true, std::runtime_error, "SuperLU's gsequ function returned "
341  "with info = " << info2 << " > 0, but its value is not in the "
342  "range permitted by the documentation. This should never happen "
343  "(it appears to be SuperLU's fault).");
344  }
345  }
346 
347  // apply row and column scalings if necessary
348  function_map::laqgs(&(data_.A), data_.R.data(),
349  data_.C.data(), rowcnd, colcnd,
350  amax, &(data_.equed));
351 
352  // check what types of equilibration was actually done
353  data_.rowequ = (data_.equed == 'R') || (data_.equed == 'B');
354  data_.colequ = (data_.equed == 'C') || (data_.equed == 'B');
355  }
356 
357  // Apply the column permutation computed in preOrdering. Place the
358  // column-permuted matrix in AC
359  SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
360  data_.etree.data(), &(data_.AC));
361 
362  { // Do factorization
363 #ifdef HAVE_AMESOS2_TIMERS
364  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
365 #endif
366 
367 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
368  std::cout << "Superlu:: Before numeric factorization" << std::endl;
369  std::cout << "nzvals_ : " << nzvals_.toString() << std::endl;
370  std::cout << "rowind_ : " << rowind_.toString() << std::endl;
371  std::cout << "colptr_ : " << colptr_.toString() << std::endl;
372 #endif
373 
374  if(ILU_Flag_==false) {
375  function_map::gstrf(&(data_.options), &(data_.AC),
376  data_.relax, data_.panel_size, data_.etree.data(),
377  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
378  &(data_.L), &(data_.U),
379 #ifdef HAVE_AMESOS2_SUPERLU5_API
380  &(data_.lu),
381 #endif
382  &(data_.stat), &info);
383  }
384  else {
385  function_map::gsitrf(&(data_.options), &(data_.AC),
386  data_.relax, data_.panel_size, data_.etree.data(),
387  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
388  &(data_.L), &(data_.U),
389 #ifdef HAVE_AMESOS2_SUPERLU5_API
390  &(data_.lu),
391 #endif
392  &(data_.stat), &info);
393  }
394 
395  if (data_.options.ConditionNumber == SLU::YES) {
396  char norm[1];
397  if (data_.options.Trans == SLU::NOTRANS) {
398  *(unsigned char *)norm = '1';
399  } else {
400  *(unsigned char *)norm = 'I';
401  }
402 
403  data_.anorm = function_map::langs(norm, &(data_.A));
404  function_map::gscon(norm, &(data_.L), &(data_.U),
405  data_.anorm, &(data_.rcond),
406  &(data_.stat), &info);
407  }
408  }
409  // Cleanup. AC data will be alloc'd again for next factorization (if at all)
410  SLU::Destroy_CompCol_Permuted( &(data_.AC) );
411 
412  // Set the number of non-zero values in the L and U factors
413  this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
414  ((SLU::NCformat*)data_.U.Store)->nnz) );
415  }
416 
417  /* All processes should have the same error code */
418  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
419 
420  global_size_type info_st = as<global_size_type>(info);
421  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
422  std::runtime_error,
423  "Factorization complete, but matrix is singular. Division by zero eminent");
424  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
425  std::runtime_error,
426  "Memory allocation failure in Superlu factorization");
427 
428  data_.options.Fact = SLU::FACTORED;
429  same_symbolic_ = true;
430 
431  if(use_triangular_solves_) {
432 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
433  #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
434  if (this->getComm()->getRank()) {
435  std::cout << " > Metis : " << (use_metis_ ? "YES" : "NO") << std::endl;
436  std::cout << " > Equil : " << (data_.options.Equil == SLU::YES ? "YES" : "NO") << std::endl;
437  std::cout << " > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ? "YES" : "NO") << std::endl;
438  std::cout << " > Invert diag : " << sptrsv_invert_diag_ << std::endl;
439  std::cout << " > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
440  std::cout << " > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
441  std::cout << " > Merge : " << sptrsv_merge_supernodes_ << std::endl;
442  std::cout << " > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
443  }
444  //std::cout << myRank << " : siize(A) " << (data_.A.nrow) << " x " << (data_.A.ncol) << std::endl;
445  //std::cout << myRank << " : nnz(A) " << ((SLU::NCformat*)data_.A.Store)->nnz << std::endl;
446  //std::cout << myRank << " : nnz(L) " << ((SLU::SCformat*)data_.L.Store)->nnz << std::endl;
447  //std::cout << myRank << " : nnz(U) " << ((SLU::SCformat*)data_.U.Store)->nnz << std::endl;
448  #endif
449 #endif
450 #ifdef HAVE_AMESOS2_TIMERS
451  Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv setup");
452  Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
453 #endif
454  triangular_solve_factor();
455  }
456 
457  return(info);
458 }
459 
460 
461 template <class Matrix, class Vector>
462 int
464  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
465 {
466  using Teuchos::as;
467 #ifdef HAVE_AMESOS2_TIMERS
468  Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for Amesos2");
469  Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
470 #endif
471 
472  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
473  const size_t nrhs = X->getGlobalNumVectors();
474 
475  bool bDidAssignX = false; // will be set below
476  bool bDidAssignB = false; // will be set below
477  { // Get values from RHS B
478 #ifdef HAVE_AMESOS2_TIMERS
479  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
480  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
481 #endif
482 
483  // In general we may want to write directly to the x space without a copy.
484  // So we 'get' x which may be a direct view assignment to the MV.
485  const bool initialize_data = true;
486  const bool do_not_initialize_data = false;
487  if(use_triangular_solves_) { // to device
488 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
489  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
490  device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
491  as<size_t>(ld_rhs),
492  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
493  this->rowIndexBase_);
494  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
495  device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
496  as<size_t>(ld_rhs),
497  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
498  this->rowIndexBase_);
499 #endif
500  }
501  else { // to host
502  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
503  host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
504  as<size_t>(ld_rhs),
505  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
506  this->rowIndexBase_);
507  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
508  host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
509  as<size_t>(ld_rhs),
510  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
511  this->rowIndexBase_);
512  }
513  }
514 
515  // If equilibration was applied at numeric, then gssvx and gsisx are going to
516  // modify B, so we can't use the optimized assignment to B since we'll change
517  // the source test vector and then fail the subsequent cycle. We need a copy.
518  // If bDidAssignB is false, then we skip the copy since it was copied already.
519  if(bDidAssignB && data_.equed != 'N') {
520  if(use_triangular_solves_) { // to device
521 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
522  device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
523  device_bValues_.extent(0), device_bValues_.extent(1));
524  Kokkos::deep_copy(copyB, device_bValues_);
525  device_bValues_ = copyB;
526 #endif
527  }
528  else {
529  host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
530  host_bValues_.extent(0), host_bValues_.extent(1));
531  Kokkos::deep_copy(copyB, host_bValues_);
532  host_bValues_ = copyB;
533  }
534  }
535 
536  int ierr = 0; // returned error code
537 
538  magnitude_type rpg, rcond;
539  if ( this->root_ ) {
540  // Note: the values of B and X (after solution) are adjusted
541  // appropriately within gssvx for row and column scalings.
542  { // Do solve!
543 #ifdef HAVE_AMESOS2_TIMERS
544  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
545 #endif
546 
547  if(use_triangular_solves_) {
548  triangular_solve();
549  }
550  else {
551  Kokkos::resize(data_.ferr, nrhs);
552  Kokkos::resize(data_.berr, nrhs);
553 
554  {
555  #ifdef HAVE_AMESOS2_TIMERS
556  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
557  #endif
558  SLU::Dtype_t dtype = type_map::dtype;
559  int i_ld_rhs = as<int>(ld_rhs);
560  function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
561  convert_bValues_, host_bValues_, i_ld_rhs,
562  SLU::SLU_DN, dtype, SLU::SLU_GE);
563  function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
564  convert_xValues_, host_xValues_, i_ld_rhs,
565  SLU::SLU_DN, dtype, SLU::SLU_GE);
566  }
567 
568  if(ILU_Flag_==false) {
569  function_map::gssvx(&(data_.options), &(data_.A),
570  data_.perm_c.data(), data_.perm_r.data(),
571  data_.etree.data(), &(data_.equed), data_.R.data(),
572  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
573  &(data_.X), &rpg, &rcond, data_.ferr.data(),
574  data_.berr.data(),
575  #ifdef HAVE_AMESOS2_SUPERLU5_API
576  &(data_.lu),
577  #endif
578  &(data_.mem_usage), &(data_.stat), &ierr);
579  }
580  else {
581  function_map::gsisx(&(data_.options), &(data_.A),
582  data_.perm_c.data(), data_.perm_r.data(),
583  data_.etree.data(), &(data_.equed), data_.R.data(),
584  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
585  &(data_.X), &rpg, &rcond,
586  #ifdef HAVE_AMESOS2_SUPERLU5_API
587  &(data_.lu),
588  #endif
589  &(data_.mem_usage), &(data_.stat), &ierr);
590  }
591 
592  // Cleanup X and B stores
593  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
594  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
595  data_.X.Store = NULL;
596  data_.B.Store = NULL;
597  }
598 
599  } // do solve
600 
601  // convert back to Kokkos views
602  {
603 #ifdef HAVE_AMESOS2_TIMERS
604  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
605 #endif
606  function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
607  function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
608  }
609 
610  // Cleanup X and B stores
611  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
612  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
613  data_.X.Store = NULL;
614  data_.B.Store = NULL;
615  }
616 
617  /* All processes should have the same error code */
618  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
619 
620  global_size_type ierr_st = as<global_size_type>(ierr);
621  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
622  std::invalid_argument,
623  "Argument " << -ierr << " to SuperLU xgssvx had illegal value" );
624  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
625  std::runtime_error,
626  "Factorization complete, but U is exactly singular" );
627  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
628  std::runtime_error,
629  "SuperLU allocated " << ierr - this->globalNumCols_ << " bytes of "
630  "memory before allocation failure occured." );
631 
632  /* Update X's global values */
633 
634  // if bDidAssignX, then we solved straight to the adapter's X memory space without
635  // requiring additional memory allocation, so the x data is already in place.
636  if(!bDidAssignX) {
637 #ifdef HAVE_AMESOS2_TIMERS
638  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
639 #endif
640 
641  if(use_triangular_solves_) { // to device
642 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
643  Util::put_1d_data_helper_kokkos_view<
644  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
645  as<size_t>(ld_rhs),
646  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
647  this->rowIndexBase_);
648 #endif
649  }
650  else {
651  Util::put_1d_data_helper_kokkos_view<
652  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
653  as<size_t>(ld_rhs),
654  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
655  this->rowIndexBase_);
656  }
657  }
658 
659  return(ierr);
660 }
661 
662 
663 template <class Matrix, class Vector>
664 bool
666 {
667  // The Superlu factorization routines can handle square as well as
668  // rectangular matrices, but Superlu can only apply the solve routines to
669  // square matrices, so we check the matrix for squareness.
670  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
671 }
672 
673 
674 template <class Matrix, class Vector>
675 void
676 Superlu<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
677 {
678  using Teuchos::RCP;
679  using Teuchos::getIntegralValue;
680  using Teuchos::ParameterEntryValidator;
681 
682  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
683 
684  ILU_Flag_ = parameterList->get<bool>("ILU_Flag",false);
685  if (ILU_Flag_) {
686  SLU::ilu_set_default_options(&(data_.options));
687  // Override some default options
688  data_.options.PrintStat = SLU::NO;
689  }
690 
691  data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
692  // The SuperLU transpose option can override the Amesos2 option
693  if( parameterList->isParameter("Trans") ){
694  RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
695  parameterList->getEntry("Trans").setValidator(trans_validator);
696 
697  data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList, "Trans");
698  }
699 
700  if( parameterList->isParameter("IterRefine") ){
701  RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry("IterRefine").validator();
702  parameterList->getEntry("IterRefine").setValidator(refine_validator);
703 
704  data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList, "IterRefine");
705  }
706 
707  if( parameterList->isParameter("ColPerm") ){
708  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
709  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
710 
711  data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList, "ColPerm");
712  }
713 
714  data_.options.DiagPivotThresh = parameterList->get<double>("DiagPivotThresh", 1.0);
715 
716  bool equil = parameterList->get<bool>("Equil", true);
717  data_.options.Equil = equil ? SLU::YES : SLU::NO;
718 
719  bool condNum = parameterList->get<bool>("ConditionNumber", false);
720  data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
721 
722  bool symmetric_mode = parameterList->get<bool>("SymmetricMode", false);
723  data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
724 
725  // ILU parameters
726  if( parameterList->isParameter("RowPerm") ){
727  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
728  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
729  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
730  }
731 
732  /*if( parameterList->isParameter("ILU_DropRule") ) {
733  RCP<const ParameterEntryValidator> droprule_validator = valid_params->getEntry("ILU_DropRule").validator();
734  parameterList->getEntry("ILU_DropRule").setValidator(droprule_validator);
735  data_.options.ILU_DropRule = getIntegralValue<SLU::rule_t>(*parameterList, "ILU_DropRule");
736  }*/
737 
738  data_.options.ILU_DropTol = parameterList->get<double>("ILU_DropTol", 0.0001);
739 
740  data_.options.ILU_FillFactor = parameterList->get<double>("ILU_FillFactor", 10.0);
741 
742  if( parameterList->isParameter("ILU_Norm") ) {
743  RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry("ILU_Norm").validator();
744  parameterList->getEntry("ILU_Norm").setValidator(norm_validator);
745  data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList, "ILU_Norm");
746  }
747 
748  if( parameterList->isParameter("ILU_MILU") ) {
749  RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry("ILU_MILU").validator();
750  parameterList->getEntry("ILU_MILU").setValidator(milu_validator);
751  data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList, "ILU_MILU");
752  }
753 
754  data_.options.ILU_FillTol = parameterList->get<double>("ILU_FillTol", 0.01);
755 
756  is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
757 
758  use_metis_ = parameterList->get<bool>("UseMetis", false);
759  symmetrize_metis_ = parameterList->get<bool>("SymmetrizeMetis", true);
760 
761  use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
762  if(use_triangular_solves_) {
763 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
764  // specify whether to invert diagonal blocks
765  sptrsv_invert_diag_ = parameterList->get<bool>("SpTRSV_Invert_Diag", true);
766  // specify whether to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
767  sptrsv_invert_offdiag_ = parameterList->get<bool>("SpTRSV_Invert_OffDiag", false);
768  // specify whether to store U in CSR format
769  sptrsv_u_in_csr_ = parameterList->get<bool>("SpTRSV_U_CSR", true);
770  // specify whether to merge supernodes with similar sparsity structures
771  sptrsv_merge_supernodes_ = parameterList->get<bool>("SpTRSV_Merge_Supernodes", false);
772  // specify whether to use spmv for sptrsv
773  sptrsv_use_spmv_ = parameterList->get<bool>("SpTRSV_Use_SpMV", false);
774 #else
775  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
776  "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
777 #endif
778  }
779 }
780 
781 
782 template <class Matrix, class Vector>
783 Teuchos::RCP<const Teuchos::ParameterList>
785 {
786  using std::string;
787  using Teuchos::tuple;
788  using Teuchos::ParameterList;
789  using Teuchos::EnhancedNumberValidator;
790  using Teuchos::setStringToIntegralParameter;
791  using Teuchos::stringToIntegralParameterEntryValidator;
792 
793  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
794 
795  if( is_null(valid_params) ){
796  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
797 
798  setStringToIntegralParameter<SLU::trans_t>("Trans", "NOTRANS",
799  "Solve for the transpose system or not",
800  tuple<string>("TRANS","NOTRANS","CONJ"),
801  tuple<string>("Solve with transpose",
802  "Do not solve with transpose",
803  "Solve with the conjugate transpose"),
804  tuple<SLU::trans_t>(SLU::TRANS,
805  SLU::NOTRANS,
806  SLU::CONJ),
807  pl.getRawPtr());
808 
809  setStringToIntegralParameter<SLU::IterRefine_t>("IterRefine", "NOREFINE",
810  "Type of iterative refinement to use",
811  tuple<string>("NOREFINE", "SLU_SINGLE", "SLU_DOUBLE"),
812  tuple<string>("Do not use iterative refinement",
813  "Do single iterative refinement",
814  "Do double iterative refinement"),
815  tuple<SLU::IterRefine_t>(SLU::NOREFINE,
816  SLU::SLU_SINGLE,
817  SLU::SLU_DOUBLE),
818  pl.getRawPtr());
819 
820  // Note: MY_PERMC not yet supported
821  setStringToIntegralParameter<SLU::colperm_t>("ColPerm", "COLAMD",
822  "Specifies how to permute the columns of the "
823  "matrix for sparsity preservation",
824  tuple<string>("NATURAL", "MMD_AT_PLUS_A",
825  "MMD_ATA", "COLAMD"),
826  tuple<string>("Natural ordering",
827  "Minimum degree ordering on A^T + A",
828  "Minimum degree ordering on A^T A",
829  "Approximate minimum degree column ordering"),
830  tuple<SLU::colperm_t>(SLU::NATURAL,
831  SLU::MMD_AT_PLUS_A,
832  SLU::MMD_ATA,
833  SLU::COLAMD),
834  pl.getRawPtr());
835 
836  Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
837  = Teuchos::rcp( new EnhancedNumberValidator<double>(0.0, 1.0) );
838  pl->set("DiagPivotThresh", 1.0,
839  "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
840  diag_pivot_thresh_validator); // partial pivoting
841 
842  pl->set("Equil", true, "Whether to equilibrate the system before solve");
843  pl->set("ConditionNumber", false, "Whether to approximate condition number");
844 
845  pl->set("SymmetricMode", false,
846  "Specifies whether to use the symmetric mode. "
847  "Gives preference to diagonal pivots and uses "
848  "an (A^T + A)-based column permutation.");
849 
850  // ILU parameters
851 
852  setStringToIntegralParameter<SLU::rowperm_t>("RowPerm", "LargeDiag",
853  "Type of row permutation strategy to use",
854  tuple<string>("NOROWPERM","LargeDiag","MY_PERMR"),
855  tuple<string>("Use natural ordering",
856  "Use weighted bipartite matching algorithm",
857  "Use the ordering given in perm_r input"),
858  tuple<SLU::rowperm_t>(SLU::NOROWPERM,
859 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
860  SLU::LargeDiag_MC64,
861 #else
862  SLU::LargeDiag,
863 #endif
864  SLU::MY_PERMR),
865  pl.getRawPtr());
866 
867  /*setStringToIntegralParameter<SLU::rule_t>("ILU_DropRule", "DROP_BASIC",
868  "Type of dropping strategy to use",
869  tuple<string>("DROP_BASIC","DROP_PROWS",
870  "DROP_COLUMN","DROP_AREA",
871  "DROP_DYNAMIC","DROP_INTERP"),
872  tuple<string>("ILUTP(t)","ILUTP(p,t)",
873  "Variant of ILUTP(p,t) for j-th column",
874  "Variant of ILUTP to control memory",
875  "Dynamically adjust threshold",
876  "Compute second dropping threshold by interpolation"),
877  tuple<SLU::rule_t>(SLU::DROP_BASIC,SLU::DROP_PROWS,SLU::DROP_COLUMN,
878  SLU::DROP_AREA,SLU::DROP_DYNAMIC,SLU::DROP_INTERP),
879  pl.getRawPtr());*/
880 
881  pl->set("ILU_DropTol", 0.0001, "ILUT drop tolerance");
882 
883  pl->set("ILU_FillFactor", 10.0, "ILUT fill factor");
884 
885  setStringToIntegralParameter<SLU::norm_t>("ILU_Norm", "INF_NORM",
886  "Type of norm to use",
887  tuple<string>("ONE_NORM","TWO_NORM","INF_NORM"),
888  tuple<string>("1-norm","2-norm","inf-norm"),
889  tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
890  pl.getRawPtr());
891 
892  setStringToIntegralParameter<SLU::milu_t>("ILU_MILU", "SILU",
893  "Type of modified ILU to use",
894  tuple<string>("SILU","SMILU_1","SMILU_2","SMILU_3"),
895  tuple<string>("Regular ILU","MILU 1","MILU 2","MILU 3"),
896  tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
897  SLU::SMILU_3),
898  pl.getRawPtr());
899 
900  pl->set("ILU_FillTol", 0.01, "ILUT fill tolerance");
901 
902  pl->set("ILU_Flag", false, "ILU flag: if true, run ILU routines");
903 
904  pl->set("IsContiguous", true, "Whether GIDs contiguous");
905 
906  // call METIS before SuperLU
907  pl->set("UseMetis", false, "Whether to call METIS before SuperLU");
908  pl->set("SymmetrizeMetis", true, "Whether to symmetrize matrix before METIS");
909 
910  pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
911 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
912  pl->set("SpTRSV_Invert_Diag", true, "specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
913  pl->set("SpTRSV_Invert_OffDiag", false, "specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
914  pl->set("SpTRSV_U_CSR", true, "specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
915  pl->set("SpTRSV_Merge_Supernodes", false, "specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
916  pl->set("SpTRSV_Use_SpMV", false, "specify whether to use spmv for supernodal sparse-trianguular solve");
917 #endif
918 
919  valid_params = pl;
920  }
921 
922  return valid_params;
923 }
924 
925 
926 template <class Matrix, class Vector>
927 bool
929 {
930  using Teuchos::as;
931 
932 #ifdef HAVE_AMESOS2_TIMERS
933  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
934 #endif
935 
936  // SuperLU does not need the matrix at symbolicFactorization()
937  if( current_phase == SYMBFACT ) return false;
938 
939  // Cleanup old store memory if it's non-NULL (should only ever be non-NULL at root_)
940  if( data_.A.Store != NULL ){
941  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
942  data_.A.Store = NULL;
943  }
944 
945  // Only the root image needs storage allocated
946  if( this->root_ ){
947  Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
948  Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
949  Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
950  }
951 
952  int nnz_ret = 0;
953  {
954 #ifdef HAVE_AMESOS2_TIMERS
955  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
956 #endif
957 
958  TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
959  std::runtime_error,
960  "Row and column maps have different indexbase ");
961 
963  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
964  host_size_type_array>::do_get(this->matrixA_.ptr(),
965  host_nzvals_view_, host_rows_view_,
966  host_col_ptr_view_, nnz_ret,
967  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
968  ARBITRARY,
969  this->rowIndexBase_);
970  }
971 
972  // Get the SLU data type for this type of matrix
973  SLU::Dtype_t dtype = type_map::dtype;
974 
975  if( this->root_ ){
976  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
977  std::runtime_error,
978  "Did not get the expected number of non-zero vals");
979 
980  function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
981  this->globalNumRows_, this->globalNumCols_,
982  nnz_ret,
983  convert_nzvals_, host_nzvals_view_,
984  host_rows_view_.data(),
985  host_col_ptr_view_.data(),
986  SLU::SLU_NC, dtype, SLU::SLU_GE);
987  }
988 
989  return true;
990 }
991 
992 template <class Matrix, class Vector>
993 void
995 {
996 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
997  size_t ld_rhs = this->matrixA_->getGlobalNumRows();
998 
999  // convert etree to parents
1000  SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1001  int nsuper = 1 + Lstore->nsuper; // # of supernodal columns
1002  Kokkos::resize(data_.parents, nsuper);
1003  for (int s = 0; s < nsuper; s++) {
1004  int j = Lstore->sup_to_col[s+1]-1; // the last column index of this supernode
1005  if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1006  data_.parents(s) = -1;
1007  } else {
1008  data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1009  }
1010  }
1011 
1012  deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r); // will use device to permute
1013  deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c); // will use device to permute
1014  if (data_.options.Equil == SLU::YES) {
1015  deep_copy_or_assign_view(device_trsv_R_, data_.R); // will use device to scale
1016  deep_copy_or_assign_view(device_trsv_C_, data_.C); // will use device to scale
1017  }
1018 
1019  bool condition_flag = false;
1020  if (data_.options.ConditionNumber == SLU::YES) {
1021  using STM = Teuchos::ScalarTraits<magnitude_type>;
1022  const magnitude_type eps = STM::eps ();
1023 
1024  SCformat *Lstore = (SCformat*)(data_.L.Store);
1025  int nsuper = 1 + Lstore->nsuper;
1026  int *nb = Lstore->sup_to_col;
1027  int max_cols = 0;
1028  for (int i = 0; i < nsuper; i++) {
1029  if (nb[i+1] - nb[i] > max_cols) {
1030  max_cols = nb[i+1] - nb[i];
1031  }
1032  }
1033 
1034  // when rcond is small, it is ill-conditioned and flag is true
1035  const magnitude_type multiply_fact (10.0); // larger the value, more likely flag is true, and no invert
1036  condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1037 
1038 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1039  int n = data_.perm_r.extent(0);
1040  std::cout << this->getComm()->getRank()
1041  << " : anorm = " << data_.anorm << ", rcond = " << data_.rcond << ", n = " << n
1042  << ", num super cols = " << nsuper << ", max super cols = " << max_cols
1043  << " -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1044  << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ? " (okay)" : " (warn)") << std::endl;
1045 #endif
1046  }
1047 
1048  // Create handles for U and U^T solves
1049  if (sptrsv_use_spmv_ && !condition_flag) {
1050  device_khL_.create_sptrsv_handle(
1051  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, true);
1052  device_khU_.create_sptrsv_handle(
1053  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, false);
1054  } else {
1055  device_khL_.create_sptrsv_handle(
1056  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, true);
1057  device_khU_.create_sptrsv_handle(
1058  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, false);
1059  }
1060 
1061  // specify whether to store U in CSR format
1062  device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1063 
1064  // specify whether to merge supernodes with similar sparsity structure
1065  device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1066  device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1067 
1068  // invert only if flag is not on
1069  bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1070  bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1071 
1072  // specify whether to invert diagonal blocks
1073  device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1074  device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1075 
1076  // specify wheather to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
1077  device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1078  device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1079 
1080  // set etree
1081  device_khL_.set_sptrsv_etree(data_.parents.data());
1082  device_khU_.set_sptrsv_etree(data_.parents.data());
1083 
1084  // set permutation
1085  device_khL_.set_sptrsv_perm(data_.perm_r.data());
1086  device_khU_.set_sptrsv_perm(data_.perm_c.data());
1087 
1088  int block_size = -1; // TODO: add needed params
1089  if (block_size >= 0) {
1090  std::cout << " Block Size : " << block_size << std::endl;
1091  device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1092  device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1093  }
1094 
1095  // Do symbolic analysis
1096  {
1097 #ifdef HAVE_AMESOS2_TIMERS
1098  Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv symbolic");
1099  Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1100 #endif
1101  KokkosSparse::Experimental::sptrsv_symbolic
1102  (&device_khL_, &device_khU_, data_.L, data_.U);
1103  }
1104 
1105  // Do numerical compute
1106  {
1107 #ifdef HAVE_AMESOS2_TIMERS
1108  Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv numeric");
1109  Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1110 #endif
1111  KokkosSparse::Experimental::sptrsv_compute
1112  (&device_khL_, &device_khU_, data_.L, data_.U);
1113  }
1114 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1115 }
1116 
1117 template <class Matrix, class Vector>
1118 void
1119 Superlu<Matrix,Vector>::triangular_solve() const
1120 {
1121 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1122  size_t ld_rhs = device_xValues_.extent(0);
1123  size_t nrhs = device_xValues_.extent(1);
1124 
1125  Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1126  Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1127 
1128  // forward pivot
1129  auto local_device_bValues = device_bValues_;
1130  auto local_device_trsv_perm_r = device_trsv_perm_r_;
1131  auto local_device_trsv_rhs = device_trsv_rhs_;
1132 
1133  if (data_.rowequ) {
1134  auto local_device_trsv_R = device_trsv_R_;
1135  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1136  KOKKOS_LAMBDA(size_t j) {
1137  for(size_t k = 0; k < nrhs; ++k) {
1138  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1139  }
1140  });
1141  } else {
1142  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1143  KOKKOS_LAMBDA(size_t j) {
1144  for(size_t k = 0; k < nrhs; ++k) {
1145  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1146  }
1147  });
1148  }
1149 
1150  for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
1151  auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1152  auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1153 
1154  // do L solve= - numeric (only rhs is modified) on the default device/host space
1155  {
1156 #ifdef HAVE_AMESOS2_TIMERS
1157  Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for L solve");
1158  Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1159 #endif
1160  KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1161  }
1162  // do L^T solve - numeric (only rhs is modified) on the default device/host space
1163  {
1164 #ifdef HAVE_AMESOS2_TIMERS
1165  Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for U solve");
1166  Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1167 #endif
1168  KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1169  }
1170  } // end loop over rhs vectors
1171 
1172  // backward pivot
1173  auto local_device_xValues = device_xValues_;
1174  auto local_device_trsv_perm_c = device_trsv_perm_c_;
1175  if (data_.colequ) {
1176  auto local_device_trsv_C = device_trsv_C_;
1177  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1178  KOKKOS_LAMBDA(size_t j) {
1179  for(size_t k = 0; k < nrhs; ++k) {
1180  local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1181  }
1182  });
1183  } else {
1184  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1185  KOKKOS_LAMBDA(size_t j) {
1186  for(size_t k = 0; k < nrhs; ++k) {
1187  local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1188  }
1189  });
1190  }
1191 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1192 }
1193 
1194 template<class Matrix, class Vector>
1195 const char* Superlu<Matrix,Vector>::name = "SuperLU";
1196 
1197 
1198 } // end namespace Amesos2
1199 
1200 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:928
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:283
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:94
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:665
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:784
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:117
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:463
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superlu_def.hpp:676
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:171
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:196
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:42