Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superlu_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
19 #ifndef AMESOS2_SUPERLU_DEF_HPP
20 #define AMESOS2_SUPERLU_DEF_HPP
21 
22 #include <Teuchos_Tuple.hpp>
23 #include <Teuchos_ParameterList.hpp>
24 #include <Teuchos_StandardParameterEntryValidators.hpp>
25 
27 #include "Amesos2_Superlu_decl.hpp"
28 
29 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
30 // TODO: This 'using namespace SLU' is not a good thing.
31 // It was added because kernels does not namespace SuperLU but Amesos2 does.
32 // Declaring the namespace SLU allows us to reuse that file without duplication.
33 // We need to determine a uniform system to avoid this this but for now, at least
34 // this issue is only present when TRSV is enabled.
35 using namespace SLU;
36 #include "KokkosSparse_sptrsv_superlu.hpp"
37 #endif
38 
39 namespace Amesos2 {
40 
41 
42 template <class Matrix, class Vector>
44  Teuchos::RCP<const Matrix> A,
45  Teuchos::RCP<Vector> X,
46  Teuchos::RCP<const Vector> B )
47  : SolverCore<Amesos2::Superlu,Matrix,Vector>(A, X, B)
48 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
49  , sptrsv_invert_diag_(true)
50  , sptrsv_invert_offdiag_ (false)
51  , sptrsv_u_in_csr_ (true)
52  , sptrsv_merge_supernodes_ (false)
53  , sptrsv_use_spmv_ (false)
54 #endif
55  , is_contiguous_(true) // default is set by params
56  , use_triangular_solves_(false) // default is set by params
57  , use_metis_(false)
58  , symmetrize_metis_(true)
59 {
60  // ilu_set_default_options is called later in set parameter list if required.
61  // This is not the ideal way, but the other option to always call
62  // ilu_set_default_options here and assuming it won't have any side effect
63  // in the TPL is more dangerous. It is not a good idea to rely on external
64  // libraries' internal "features".
65  SLU::set_default_options(&(data_.options));
66  // Override some default options
67  data_.options.PrintStat = SLU::NO;
68 
69  SLU::StatInit(&(data_.stat));
70 
71  Kokkos::resize(data_.perm_r, this->globalNumRows_);
72  Kokkos::resize(data_.perm_c, this->globalNumCols_);
73  Kokkos::resize(data_.etree, this->globalNumCols_);
74  Kokkos::resize(data_.R, this->globalNumRows_);
75  Kokkos::resize(data_.C, this->globalNumCols_);
76 
77  data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu
78  data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size
79 
80  data_.equed = 'N'; // No equilibration
81  data_.rowequ = false;
82  data_.colequ = false;
83  data_.A.Store = NULL;
84  data_.L.Store = NULL;
85  data_.U.Store = NULL;
86  data_.X.Store = NULL;
87  data_.B.Store = NULL;
88 
89  ILU_Flag_=false; // default: turn off ILU
90 }
91 
92 
93 template <class Matrix, class Vector>
95 {
96  /* Free Superlu data_types
97  * - Matrices
98  * - Vectors
99  * - Stat object
100  */
101  SLU::StatFree( &(data_.stat) ) ;
102 
103  // Storage is initialized in numericFactorization_impl()
104  if ( data_.A.Store != NULL ){
105  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
106  }
107 
108  // only root allocated these SuperMatrices.
109  if ( data_.L.Store != NULL ){ // will only be true for this->root_
110  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
111  SLU::Destroy_CompCol_Matrix( &(data_.U) );
112  }
113 }
114 
115 template <class Matrix, class Vector>
116 std::string
118 {
119  std::ostringstream oss;
120  oss << "SuperLU solver interface";
121  if (ILU_Flag_) {
122  oss << ", \"ILUTP\" : {";
123  oss << "drop tol = " << data_.options.ILU_DropTol;
124  oss << ", fill factor = " << data_.options.ILU_FillFactor;
125  oss << ", fill tol = " << data_.options.ILU_FillTol;
126  switch(data_.options.ILU_MILU) {
127  case SLU::SMILU_1 :
128  oss << ", MILU 1";
129  break;
130  case SLU::SMILU_2 :
131  oss << ", MILU 2";
132  break;
133  case SLU::SMILU_3 :
134  oss << ", MILU 3";
135  break;
136  case SLU::SILU :
137  default:
138  oss << ", regular ILU";
139  }
140  switch(data_.options.ILU_Norm) {
141  case SLU::ONE_NORM :
142  oss << ", 1-norm";
143  break;
144  case SLU::TWO_NORM :
145  oss << ", 2-norm";
146  break;
147  case SLU::INF_NORM :
148  default:
149  oss << ", infinity-norm";
150  }
151  oss << "}";
152  } else {
153  oss << ", direct solve";
154  }
155  return oss.str();
156  /*
157 
158  // ILU parameters
159  if( parameterList->isParameter("RowPerm") ){
160  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
161  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
162  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
163  }
164 
165 
166  */
167 }
168 
169 template<class Matrix, class Vector>
170 int
172 {
173  /*
174  * Get column permutation vector perm_c[], according to permc_spec:
175  * permc_spec = NATURAL: natural ordering
176  * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
177  * permc_spec = MMD_ATA: minimum degree on structure of A'*A
178  * permc_spec = COLAMD: approximate minimum degree column ordering
179  * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
180  */
181  int permc_spec = data_.options.ColPerm;
182  if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
183 #ifdef HAVE_AMESOS2_TIMERS
184  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
185 #endif
186 
187  SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
188  }
189 
190  return(0);
191 }
192 
193 
194 template <class Matrix, class Vector>
195 int
197 {
198  /*
199  * SuperLU performs symbolic factorization and numeric factorization
200  * together, but does leave some options for reusing symbolic
201  * structures that have been created on previous factorizations. If
202  * our Amesos2 user calls this function, that is an indication that
203  * the symbolic structure of the matrix is no longer valid, and
204  * SuperLU should do the factorization from scratch.
205  *
206  * This can be accomplished by setting the options.Fact flag to
207  * DOFACT, as well as setting our own internal flag to false.
208  */
209 #ifdef HAVE_AMESOS2_TIMERS
210  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
211 #endif
212  same_symbolic_ = false;
213  data_.options.Fact = SLU::DOFACT;
214 
215  if (use_metis_) {
216 #ifdef HAVE_AMESOS2_TIMERS
217  Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for METIS");
218  Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
219 #endif
220  size_t n = this->globalNumRows_;
221  size_t nz = this->globalNumNonZeros_;
222  host_size_type_array host_rowind_view_ = host_rows_view_;
223  host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
224 
225  /* symmetrize the matrix (A + A') if needed */
226  if (symmetrize_metis_) {
227  int new_nz = 0;
228  int *new_col_ptr;
229  int *new_row_ind;
230 
231  // NOTE: both size_type and ordinal_type are defined as int
232  // Consider using "symmetrize_graph" in KokkosKernels, if this becomes significant in time.
233  SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
234  &new_nz, &new_col_ptr, &new_row_ind);
235 
236  // reallocate (so that we won't overwrite the original)
237  // and copy for output
238  nz = new_nz;
239  Kokkos::resize (host_rowind_view_, nz);
240  Kokkos::realloc(host_colptr_view_, n+1);
241  for (size_t i = 0; i <= n; i++) {
242  host_colptr_view_(i) = new_col_ptr[i];
243  }
244  for (size_t i = 0; i < nz; i++) {
245  host_rowind_view_(i) = new_row_ind[i];
246  }
247 
248  // free
249  SLU::SUPERLU_FREE(new_col_ptr);
250  if (new_nz > 0) {
251  SLU::SUPERLU_FREE(new_row_ind);
252  }
253  }
254 
255  // reorder will convert both graph and perm/iperm to the internal METIS integer type
256  using metis_int_array_type = host_ordinal_type_array;
257  metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_perm"), n);
258  metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_iperm"), n);
259 
260  // call METIS
261  size_t new_nnz = 0;
262  Amesos2::Util::reorder(
263  host_colptr_view_, host_rowind_view_,
264  metis_perm, metis_iperm, new_nnz, false);
265 
266  for (size_t i = 0; i < n; i++) {
267  data_.perm_r(i) = metis_iperm(i);
268  data_.perm_c(i) = metis_iperm(i);
269  }
270 
271  // SLU will use user-specified METIS ordering
272  data_.options.ColPerm = SLU::MY_PERMC;
273  data_.options.RowPerm = SLU::MY_PERMR;
274  // Turn off Equil not to mess up METIS ordering?
275  //data_.options.Equil = SLU::NO;
276  }
277  return(0);
278 }
279 
280 
281 template <class Matrix, class Vector>
282 int
284 {
285  using Teuchos::as;
286 
287  // Cleanup old L and U matrices if we are not reusing a symbolic
288  // factorization. Stores and other data will be allocated in gstrf.
289  // Only rank 0 has valid pointers
290  if ( !same_symbolic_ && data_.L.Store != NULL ){
291  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
292  SLU::Destroy_CompCol_Matrix( &(data_.U) );
293  data_.L.Store = NULL;
294  data_.U.Store = NULL;
295  }
296 
297  if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
298 
299  int info = 0;
300  int info2 = 0; // for Equil
301  if ( this->root_ ) {
302 
303 #ifdef HAVE_AMESOS2_DEBUG
304  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
305  std::runtime_error,
306  "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
307  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
308  std::runtime_error,
309  "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
310 #endif
311 
312  if( data_.options.Equil == SLU::YES ){
313  magnitude_type rowcnd, colcnd, amax;
314 
315  // calculate row and column scalings
316  function_map::gsequ(&(data_.A), data_.R.data(),
317  data_.C.data(), &rowcnd, &colcnd,
318  &amax, &info2);
319 
320  if (info2 == 0) {
321  // apply row and column scalings if necessary
322  function_map::laqgs(&(data_.A), data_.R.data(),
323  data_.C.data(), rowcnd, colcnd,
324  amax, &(data_.equed));
325 
326  // check what types of equilibration was actually done
327  data_.rowequ = (data_.equed == 'R') || (data_.equed == 'B');
328  data_.colequ = (data_.equed == 'C') || (data_.equed == 'B');
329  }
330  }
331 
332  if (info2 == 0) {
333  // Apply the column permutation computed in preOrdering. Place the
334  // column-permuted matrix in AC
335  SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
336  data_.etree.data(), &(data_.AC));
337 
338  { // Do factorization
339 #ifdef HAVE_AMESOS2_TIMERS
340  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
341 #endif
342 
343 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
344  std::cout << "Superlu:: Before numeric factorization" << std::endl;
345  std::cout << "nzvals_ : " << nzvals_.toString() << std::endl;
346  std::cout << "rowind_ : " << rowind_.toString() << std::endl;
347  std::cout << "colptr_ : " << colptr_.toString() << std::endl;
348 #endif
349 
350  if(ILU_Flag_==false) {
351  function_map::gstrf(&(data_.options), &(data_.AC),
352  data_.relax, data_.panel_size, data_.etree.data(),
353  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
354  &(data_.L), &(data_.U),
355 #ifdef HAVE_AMESOS2_SUPERLU5_API
356  &(data_.lu),
357 #endif
358  &(data_.stat), &info);
359  }
360  else {
361  function_map::gsitrf(&(data_.options), &(data_.AC),
362  data_.relax, data_.panel_size, data_.etree.data(),
363  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
364  &(data_.L), &(data_.U),
365 #ifdef HAVE_AMESOS2_SUPERLU5_API
366  &(data_.lu),
367 #endif
368  &(data_.stat), &info);
369  }
370 
371  if (data_.options.ConditionNumber == SLU::YES) {
372  char norm[1];
373  if (data_.options.Trans == SLU::NOTRANS) {
374  *(unsigned char *)norm = '1';
375  } else {
376  *(unsigned char *)norm = 'I';
377  }
378 
379  data_.anorm = function_map::langs(norm, &(data_.A));
380  function_map::gscon(norm, &(data_.L), &(data_.U),
381  data_.anorm, &(data_.rcond),
382  &(data_.stat), &info);
383  }
384  }
385  // Cleanup. AC data will be alloc'd again for next factorization (if at all)
386  SLU::Destroy_CompCol_Permuted( &(data_.AC) );
387 
388  // Set the number of non-zero values in the L and U factors
389  this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
390  ((SLU::NCformat*)data_.U.Store)->nnz) );
391  } // end of if (info2 == 0)
392  } // end of if (this->root)
393 
394  if( data_.options.Equil == SLU::YES ) { // error check for Equil
395  /* All processes should have the same error code */
396  Teuchos::broadcast(*(this->getComm()), 0, &info2);
397 
398  TEUCHOS_TEST_FOR_EXCEPTION
399  (info2 < 0, std::runtime_error,
400  "SuperLU's gsequ function returned with status " << info2 << " < 0."
401  " This means that argument " << (-info2) << " given to the function"
402  " had an illegal value.");
403  if (info2 > 0) {
404  if (info2 <= data_.A.nrow) {
405  TEUCHOS_TEST_FOR_EXCEPTION
406  (true, std::runtime_error, "SuperLU's gsequ function returned with "
407  "info = " << info2 << " > 0, and info <= A.nrow = " << data_.A.nrow
408  << ". This means that row " << info2 << " of A is exactly zero.");
409  }
410  else if (info2 > data_.A.ncol) {
411  TEUCHOS_TEST_FOR_EXCEPTION
412  (true, std::runtime_error, "SuperLU's gsequ function returned with "
413  "info = " << info2 << " > 0, and info > A.ncol = " << data_.A.ncol
414  << ". This means that column " << (info2 - data_.A.nrow) << " of "
415  "A is exactly zero.");
416  }
417  else {
418  TEUCHOS_TEST_FOR_EXCEPTION
419  (true, std::runtime_error, "SuperLU's gsequ function returned "
420  "with info = " << info2 << " > 0, but its value is not in the "
421  "range permitted by the documentation. This should never happen "
422  "(it appears to be SuperLU's fault).");
423  }
424  }
425  }
426 
427  /* All processes should have the same error code */
428  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
429 
430  global_size_type info_st = as<global_size_type>(info);
431  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
432  std::runtime_error,
433  "Factorization complete, but matrix is singular. Division by zero eminent");
434  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
435  std::runtime_error,
436  "Memory allocation failure in Superlu factorization");
437 
438  data_.options.Fact = SLU::FACTORED;
439  same_symbolic_ = true;
440 
441  if(use_triangular_solves_) {
442 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
443  #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
444  if (this->getComm()->getRank()) {
445  std::cout << " > Metis : " << (use_metis_ ? "YES" : "NO") << std::endl;
446  std::cout << " > Equil : " << (data_.options.Equil == SLU::YES ? "YES" : "NO") << std::endl;
447  std::cout << " > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ? "YES" : "NO") << std::endl;
448  std::cout << " > Invert diag : " << sptrsv_invert_diag_ << std::endl;
449  std::cout << " > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
450  std::cout << " > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
451  std::cout << " > Merge : " << sptrsv_merge_supernodes_ << std::endl;
452  std::cout << " > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
453  }
454  //std::cout << myRank << " : siize(A) " << (data_.A.nrow) << " x " << (data_.A.ncol) << std::endl;
455  //std::cout << myRank << " : nnz(A) " << ((SLU::NCformat*)data_.A.Store)->nnz << std::endl;
456  //std::cout << myRank << " : nnz(L) " << ((SLU::SCformat*)data_.L.Store)->nnz << std::endl;
457  //std::cout << myRank << " : nnz(U) " << ((SLU::SCformat*)data_.U.Store)->nnz << std::endl;
458  #endif
459 #endif
460 #ifdef HAVE_AMESOS2_TIMERS
461  Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv setup");
462  Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
463 #endif
464  triangular_solve_factor();
465  }
466 
467  return(info);
468 }
469 
470 
471 template <class Matrix, class Vector>
472 int
474  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
475 {
476  using Teuchos::as;
477 #ifdef HAVE_AMESOS2_TIMERS
478  Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for Amesos2");
479  Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
480 #endif
481 
482  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
483  const size_t nrhs = X->getGlobalNumVectors();
484 
485  bool bDidAssignX = false; // will be set below
486  bool bDidAssignB = false; // will be set below
487  { // Get values from RHS B
488 #ifdef HAVE_AMESOS2_TIMERS
489  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
490 #endif
491 
492  // In general we may want to write directly to the x space without a copy.
493  // So we 'get' x which may be a direct view assignment to the MV.
494  const bool initialize_data = true;
495  const bool do_not_initialize_data = false;
496  if(use_triangular_solves_) { // to device
497 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
498  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
499  device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
500  as<size_t>(ld_rhs),
501  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
502  this->rowIndexBase_);
503  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
504  device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
505  as<size_t>(ld_rhs),
506  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
507  this->rowIndexBase_);
508 #endif
509  }
510  else { // to host
511  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
512  host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
513  as<size_t>(ld_rhs),
514  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
515  this->rowIndexBase_);
516  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
517  host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
518  as<size_t>(ld_rhs),
519  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
520  this->rowIndexBase_);
521  }
522  }
523 
524  // If equilibration was applied at numeric, then gssvx and gsisx are going to
525  // modify B, so we can't use the optimized assignment to B since we'll change
526  // the source test vector and then fail the subsequent cycle. We need a copy.
527  // If bDidAssignB is false, then we skip the copy since it was copied already.
528  if(bDidAssignB && data_.equed != 'N') {
529  if(use_triangular_solves_) { // to device
530 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
531  device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
532  device_bValues_.extent(0), device_bValues_.extent(1));
533  Kokkos::deep_copy(copyB, device_bValues_);
534  device_bValues_ = copyB;
535 #endif
536  }
537  else {
538  host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
539  host_bValues_.extent(0), host_bValues_.extent(1));
540  Kokkos::deep_copy(copyB, host_bValues_);
541  host_bValues_ = copyB;
542  }
543  }
544 
545  int ierr = 0; // returned error code
546 
547  magnitude_type rpg, rcond;
548  if ( this->root_ ) {
549  // Note: the values of B and X (after solution) are adjusted
550  // appropriately within gssvx for row and column scalings.
551  { // Do solve!
552 #ifdef HAVE_AMESOS2_TIMERS
553  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
554 #endif
555 
556  if(use_triangular_solves_) {
557  triangular_solve();
558  }
559  else {
560  Kokkos::resize(data_.ferr, nrhs);
561  Kokkos::resize(data_.berr, nrhs);
562 
563  {
564  #ifdef HAVE_AMESOS2_TIMERS
565  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
566  #endif
567  SLU::Dtype_t dtype = type_map::dtype;
568  int i_ld_rhs = as<int>(ld_rhs);
569  function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
570  convert_bValues_, host_bValues_, i_ld_rhs,
571  SLU::SLU_DN, dtype, SLU::SLU_GE);
572  function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
573  convert_xValues_, host_xValues_, i_ld_rhs,
574  SLU::SLU_DN, dtype, SLU::SLU_GE);
575  }
576 
577  if(ILU_Flag_==false) {
578  function_map::gssvx(&(data_.options), &(data_.A),
579  data_.perm_c.data(), data_.perm_r.data(),
580  data_.etree.data(), &(data_.equed), data_.R.data(),
581  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
582  &(data_.X), &rpg, &rcond, data_.ferr.data(),
583  data_.berr.data(),
584  #ifdef HAVE_AMESOS2_SUPERLU5_API
585  &(data_.lu),
586  #endif
587  &(data_.mem_usage), &(data_.stat), &ierr);
588  }
589  else {
590  function_map::gsisx(&(data_.options), &(data_.A),
591  data_.perm_c.data(), data_.perm_r.data(),
592  data_.etree.data(), &(data_.equed), data_.R.data(),
593  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
594  &(data_.X), &rpg, &rcond,
595  #ifdef HAVE_AMESOS2_SUPERLU5_API
596  &(data_.lu),
597  #endif
598  &(data_.mem_usage), &(data_.stat), &ierr);
599  }
600 
601  // Cleanup X and B stores
602  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
603  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
604  data_.X.Store = NULL;
605  data_.B.Store = NULL;
606  }
607 
608  } // do solve
609 
610  // convert back to Kokkos views
611  {
612 #ifdef HAVE_AMESOS2_TIMERS
613  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
614 #endif
615  function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
616  function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
617  }
618 
619  // Cleanup X and B stores
620  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
621  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
622  data_.X.Store = NULL;
623  data_.B.Store = NULL;
624  }
625 
626  /* All processes should have the same error code */
627  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
628 
629  global_size_type ierr_st = as<global_size_type>(ierr);
630  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
631  std::invalid_argument,
632  "Argument " << -ierr << " to SuperLU xgssvx had illegal value" );
633  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
634  std::runtime_error,
635  "Factorization complete, but U is exactly singular" );
636  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
637  std::runtime_error,
638  "SuperLU allocated " << ierr - this->globalNumCols_ << " bytes of "
639  "memory before allocation failure occured." );
640 
641  /* Update X's global values */
642 
643  // if bDidAssignX, then we solved straight to the adapter's X memory space without
644  // requiring additional memory allocation, so the x data is already in place.
645  if(!bDidAssignX) {
646 #ifdef HAVE_AMESOS2_TIMERS
647  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
648 #endif
649 
650  if(use_triangular_solves_) { // to device
651 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
652  Util::put_1d_data_helper_kokkos_view<
653  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
654  as<size_t>(ld_rhs),
655  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
656  this->rowIndexBase_);
657 #endif
658  }
659  else {
660  Util::put_1d_data_helper_kokkos_view<
661  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
662  as<size_t>(ld_rhs),
663  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
664  this->rowIndexBase_);
665  }
666  }
667 
668  return(ierr);
669 }
670 
671 
672 template <class Matrix, class Vector>
673 bool
675 {
676  // The Superlu factorization routines can handle square as well as
677  // rectangular matrices, but Superlu can only apply the solve routines to
678  // square matrices, so we check the matrix for squareness.
679  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
680 }
681 
682 
683 template <class Matrix, class Vector>
684 void
685 Superlu<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
686 {
687  using Teuchos::RCP;
688  using Teuchos::getIntegralValue;
689  using Teuchos::ParameterEntryValidator;
690 
691  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
692 
693  ILU_Flag_ = parameterList->get<bool>("ILU_Flag",false);
694  if (ILU_Flag_) {
695  SLU::ilu_set_default_options(&(data_.options));
696  // Override some default options
697  data_.options.PrintStat = SLU::NO;
698  }
699 
700  data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
701  // The SuperLU transpose option can override the Amesos2 option
702  if( parameterList->isParameter("Trans") ){
703  RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
704  parameterList->getEntry("Trans").setValidator(trans_validator);
705 
706  data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList, "Trans");
707  }
708 
709  if( parameterList->isParameter("IterRefine") ){
710  RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry("IterRefine").validator();
711  parameterList->getEntry("IterRefine").setValidator(refine_validator);
712 
713  data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList, "IterRefine");
714  }
715 
716  if( parameterList->isParameter("ColPerm") ){
717  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
718  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
719 
720  data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList, "ColPerm");
721  }
722 
723  data_.options.DiagPivotThresh = parameterList->get<double>("DiagPivotThresh", 1.0);
724 
725  bool equil = parameterList->get<bool>("Equil", true);
726  data_.options.Equil = equil ? SLU::YES : SLU::NO;
727 
728  bool condNum = parameterList->get<bool>("ConditionNumber", false);
729  data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
730 
731  bool symmetric_mode = parameterList->get<bool>("SymmetricMode", false);
732  data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
733 
734  // ILU parameters
735  if( parameterList->isParameter("RowPerm") ){
736  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
737  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
738  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
739  }
740 
741  /*if( parameterList->isParameter("ILU_DropRule") ) {
742  RCP<const ParameterEntryValidator> droprule_validator = valid_params->getEntry("ILU_DropRule").validator();
743  parameterList->getEntry("ILU_DropRule").setValidator(droprule_validator);
744  data_.options.ILU_DropRule = getIntegralValue<SLU::rule_t>(*parameterList, "ILU_DropRule");
745  }*/
746 
747  data_.options.ILU_DropTol = parameterList->get<double>("ILU_DropTol", 0.0001);
748 
749  data_.options.ILU_FillFactor = parameterList->get<double>("ILU_FillFactor", 10.0);
750 
751  if( parameterList->isParameter("ILU_Norm") ) {
752  RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry("ILU_Norm").validator();
753  parameterList->getEntry("ILU_Norm").setValidator(norm_validator);
754  data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList, "ILU_Norm");
755  }
756 
757  if( parameterList->isParameter("ILU_MILU") ) {
758  RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry("ILU_MILU").validator();
759  parameterList->getEntry("ILU_MILU").setValidator(milu_validator);
760  data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList, "ILU_MILU");
761  }
762 
763  data_.options.ILU_FillTol = parameterList->get<double>("ILU_FillTol", 0.01);
764 
765  is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
766 
767  use_metis_ = parameterList->get<bool>("UseMetis", false);
768  symmetrize_metis_ = parameterList->get<bool>("SymmetrizeMetis", true);
769 
770  use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
771  if(use_triangular_solves_) {
772 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
773  // specify whether to invert diagonal blocks
774  sptrsv_invert_diag_ = parameterList->get<bool>("SpTRSV_Invert_Diag", true);
775  // specify whether to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
776  sptrsv_invert_offdiag_ = parameterList->get<bool>("SpTRSV_Invert_OffDiag", false);
777  // specify whether to store U in CSR format
778  sptrsv_u_in_csr_ = parameterList->get<bool>("SpTRSV_U_CSR", true);
779  // specify whether to merge supernodes with similar sparsity structures
780  sptrsv_merge_supernodes_ = parameterList->get<bool>("SpTRSV_Merge_Supernodes", false);
781  // specify whether to use spmv for sptrsv
782  sptrsv_use_spmv_ = parameterList->get<bool>("SpTRSV_Use_SpMV", false);
783 #else
784  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
785  "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
786 #endif
787  }
788 }
789 
790 
791 template <class Matrix, class Vector>
792 Teuchos::RCP<const Teuchos::ParameterList>
794 {
795  using std::string;
796  using Teuchos::tuple;
797  using Teuchos::ParameterList;
798  using Teuchos::EnhancedNumberValidator;
799  using Teuchos::setStringToIntegralParameter;
800  using Teuchos::stringToIntegralParameterEntryValidator;
801 
802  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
803 
804  if( is_null(valid_params) ){
805  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
806 
807  setStringToIntegralParameter<SLU::trans_t>("Trans", "NOTRANS",
808  "Solve for the transpose system or not",
809  tuple<string>("TRANS","NOTRANS","CONJ"),
810  tuple<string>("Solve with transpose",
811  "Do not solve with transpose",
812  "Solve with the conjugate transpose"),
813  tuple<SLU::trans_t>(SLU::TRANS,
814  SLU::NOTRANS,
815  SLU::CONJ),
816  pl.getRawPtr());
817 
818  setStringToIntegralParameter<SLU::IterRefine_t>("IterRefine", "NOREFINE",
819  "Type of iterative refinement to use",
820  tuple<string>("NOREFINE", "SLU_SINGLE", "SLU_DOUBLE"),
821  tuple<string>("Do not use iterative refinement",
822  "Do single iterative refinement",
823  "Do double iterative refinement"),
824  tuple<SLU::IterRefine_t>(SLU::NOREFINE,
825  SLU::SLU_SINGLE,
826  SLU::SLU_DOUBLE),
827  pl.getRawPtr());
828 
829  // Note: MY_PERMC not yet supported
830  setStringToIntegralParameter<SLU::colperm_t>("ColPerm", "COLAMD",
831  "Specifies how to permute the columns of the "
832  "matrix for sparsity preservation",
833  tuple<string>("NATURAL", "MMD_AT_PLUS_A",
834  "MMD_ATA", "COLAMD"),
835  tuple<string>("Natural ordering",
836  "Minimum degree ordering on A^T + A",
837  "Minimum degree ordering on A^T A",
838  "Approximate minimum degree column ordering"),
839  tuple<SLU::colperm_t>(SLU::NATURAL,
840  SLU::MMD_AT_PLUS_A,
841  SLU::MMD_ATA,
842  SLU::COLAMD),
843  pl.getRawPtr());
844 
845  Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
846  = Teuchos::rcp( new EnhancedNumberValidator<double>(0.0, 1.0) );
847  pl->set("DiagPivotThresh", 1.0,
848  "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
849  diag_pivot_thresh_validator); // partial pivoting
850 
851  pl->set("Equil", true, "Whether to equilibrate the system before solve");
852  pl->set("ConditionNumber", false, "Whether to approximate condition number");
853 
854  pl->set("SymmetricMode", false,
855  "Specifies whether to use the symmetric mode. "
856  "Gives preference to diagonal pivots and uses "
857  "an (A^T + A)-based column permutation.");
858 
859  // ILU parameters
860 
861  setStringToIntegralParameter<SLU::rowperm_t>("RowPerm", "LargeDiag",
862  "Type of row permutation strategy to use",
863  tuple<string>("NOROWPERM","LargeDiag","MY_PERMR"),
864  tuple<string>("Use natural ordering",
865  "Use weighted bipartite matching algorithm",
866  "Use the ordering given in perm_r input"),
867  tuple<SLU::rowperm_t>(SLU::NOROWPERM,
868 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
869  SLU::LargeDiag_MC64,
870 #else
871  SLU::LargeDiag,
872 #endif
873  SLU::MY_PERMR),
874  pl.getRawPtr());
875 
876  /*setStringToIntegralParameter<SLU::rule_t>("ILU_DropRule", "DROP_BASIC",
877  "Type of dropping strategy to use",
878  tuple<string>("DROP_BASIC","DROP_PROWS",
879  "DROP_COLUMN","DROP_AREA",
880  "DROP_DYNAMIC","DROP_INTERP"),
881  tuple<string>("ILUTP(t)","ILUTP(p,t)",
882  "Variant of ILUTP(p,t) for j-th column",
883  "Variant of ILUTP to control memory",
884  "Dynamically adjust threshold",
885  "Compute second dropping threshold by interpolation"),
886  tuple<SLU::rule_t>(SLU::DROP_BASIC,SLU::DROP_PROWS,SLU::DROP_COLUMN,
887  SLU::DROP_AREA,SLU::DROP_DYNAMIC,SLU::DROP_INTERP),
888  pl.getRawPtr());*/
889 
890  pl->set("ILU_DropTol", 0.0001, "ILUT drop tolerance");
891 
892  pl->set("ILU_FillFactor", 10.0, "ILUT fill factor");
893 
894  setStringToIntegralParameter<SLU::norm_t>("ILU_Norm", "INF_NORM",
895  "Type of norm to use",
896  tuple<string>("ONE_NORM","TWO_NORM","INF_NORM"),
897  tuple<string>("1-norm","2-norm","inf-norm"),
898  tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
899  pl.getRawPtr());
900 
901  setStringToIntegralParameter<SLU::milu_t>("ILU_MILU", "SILU",
902  "Type of modified ILU to use",
903  tuple<string>("SILU","SMILU_1","SMILU_2","SMILU_3"),
904  tuple<string>("Regular ILU","MILU 1","MILU 2","MILU 3"),
905  tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
906  SLU::SMILU_3),
907  pl.getRawPtr());
908 
909  pl->set("ILU_FillTol", 0.01, "ILUT fill tolerance");
910 
911  pl->set("ILU_Flag", false, "ILU flag: if true, run ILU routines");
912 
913  pl->set("IsContiguous", true, "Whether GIDs contiguous");
914 
915  // call METIS before SuperLU
916  pl->set("UseMetis", false, "Whether to call METIS before SuperLU");
917  pl->set("SymmetrizeMetis", true, "Whether to symmetrize matrix before METIS");
918 
919  pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
920 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
921  pl->set("SpTRSV_Invert_Diag", true, "specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
922  pl->set("SpTRSV_Invert_OffDiag", false, "specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
923  pl->set("SpTRSV_U_CSR", true, "specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
924  pl->set("SpTRSV_Merge_Supernodes", false, "specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
925  pl->set("SpTRSV_Use_SpMV", false, "specify whether to use spmv for supernodal sparse-trianguular solve");
926 #endif
927 
928  valid_params = pl;
929  }
930 
931  return valid_params;
932 }
933 
934 
935 template <class Matrix, class Vector>
936 bool
938 {
939  using Teuchos::as;
940 
941 #ifdef HAVE_AMESOS2_TIMERS
942  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
943 #endif
944 
945  // SuperLU does not need the matrix at symbolicFactorization()
946  if( current_phase == SYMBFACT ) return false;
947 
948  // Cleanup old store memory if it's non-NULL (should only ever be non-NULL at root_)
949  if( data_.A.Store != NULL ){
950  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
951  data_.A.Store = NULL;
952  }
953 
954  // Only the root image needs storage allocated
955  if( this->root_ ){
956  Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
957  Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
958  Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
959  }
960 
961  int nnz_ret = 0;
962  {
963 #ifdef HAVE_AMESOS2_TIMERS
964  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
965 #endif
966 
967  TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
968  std::runtime_error,
969  "Row and column maps have different indexbase ");
970 
972  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
973  host_size_type_array>::do_get(this->matrixA_.ptr(),
974  host_nzvals_view_, host_rows_view_,
975  host_col_ptr_view_, nnz_ret,
976  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
977  ARBITRARY,
978  this->rowIndexBase_);
979  }
980 
981  // Get the SLU data type for this type of matrix
982  SLU::Dtype_t dtype = type_map::dtype;
983 
984  if( this->root_ ){
985  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
986  std::runtime_error,
987  "Did not get the expected number of non-zero vals");
988 
989  function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
990  this->globalNumRows_, this->globalNumCols_,
991  nnz_ret,
992  convert_nzvals_, host_nzvals_view_,
993  host_rows_view_.data(),
994  host_col_ptr_view_.data(),
995  SLU::SLU_NC, dtype, SLU::SLU_GE);
996  }
997 
998  return true;
999 }
1000 
1001 template <class Matrix, class Vector>
1002 void
1004 {
1005 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1006  size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1007 
1008  // convert etree to parents
1009  SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1010  int nsuper = 1 + Lstore->nsuper; // # of supernodal columns
1011  Kokkos::resize(data_.parents, nsuper);
1012  for (int s = 0; s < nsuper; s++) {
1013  int j = Lstore->sup_to_col[s+1]-1; // the last column index of this supernode
1014  if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1015  data_.parents(s) = -1;
1016  } else {
1017  data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1018  }
1019  }
1020 
1021  deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r); // will use device to permute
1022  deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c); // will use device to permute
1023  if (data_.options.Equil == SLU::YES) {
1024  deep_copy_or_assign_view(device_trsv_R_, data_.R); // will use device to scale
1025  deep_copy_or_assign_view(device_trsv_C_, data_.C); // will use device to scale
1026  }
1027 
1028  bool condition_flag = false;
1029  if (data_.options.ConditionNumber == SLU::YES) {
1030  using STM = Teuchos::ScalarTraits<magnitude_type>;
1031  const magnitude_type eps = STM::eps ();
1032 
1033  SCformat *Lstore = (SCformat*)(data_.L.Store);
1034  int nsuper = 1 + Lstore->nsuper;
1035  int *nb = Lstore->sup_to_col;
1036  int max_cols = 0;
1037  for (int i = 0; i < nsuper; i++) {
1038  if (nb[i+1] - nb[i] > max_cols) {
1039  max_cols = nb[i+1] - nb[i];
1040  }
1041  }
1042 
1043  // when rcond is small, it is ill-conditioned and flag is true
1044  const magnitude_type multiply_fact (10.0); // larger the value, more likely flag is true, and no invert
1045  condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1046 
1047 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1048  int n = data_.perm_r.extent(0);
1049  std::cout << this->getComm()->getRank()
1050  << " : anorm = " << data_.anorm << ", rcond = " << data_.rcond << ", n = " << n
1051  << ", num super cols = " << nsuper << ", max super cols = " << max_cols
1052  << " -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1053  << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ? " (okay)" : " (warn)") << std::endl;
1054 #endif
1055  }
1056 
1057  // Create handles for U and U^T solves
1058  if (sptrsv_use_spmv_ && !condition_flag) {
1059  device_khL_.create_sptrsv_handle(
1060  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, true);
1061  device_khU_.create_sptrsv_handle(
1062  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, false);
1063  } else {
1064  device_khL_.create_sptrsv_handle(
1065  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, true);
1066  device_khU_.create_sptrsv_handle(
1067  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, false);
1068  }
1069 
1070  // specify whether to store U in CSR format
1071  device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1072 
1073  // specify whether to merge supernodes with similar sparsity structure
1074  device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1075  device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1076 
1077  // invert only if flag is not on
1078  bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1079  bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1080 
1081  // specify whether to invert diagonal blocks
1082  device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1083  device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1084 
1085  // specify wheather to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
1086  device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1087  device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1088 
1089  // set etree
1090  device_khL_.set_sptrsv_etree(data_.parents.data());
1091  device_khU_.set_sptrsv_etree(data_.parents.data());
1092 
1093  // set permutation
1094  device_khL_.set_sptrsv_perm(data_.perm_r.data());
1095  device_khU_.set_sptrsv_perm(data_.perm_c.data());
1096 
1097  int block_size = -1; // TODO: add needed params
1098  if (block_size >= 0) {
1099  std::cout << " Block Size : " << block_size << std::endl;
1100  device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1101  device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1102  }
1103 
1104  // Do symbolic analysis
1105  {
1106 #ifdef HAVE_AMESOS2_TIMERS
1107  Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv symbolic");
1108  Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1109 #endif
1110  KokkosSparse::Experimental::sptrsv_symbolic
1111  (&device_khL_, &device_khU_, data_.L, data_.U);
1112  }
1113 
1114  // Do numerical compute
1115  {
1116 #ifdef HAVE_AMESOS2_TIMERS
1117  Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv numeric");
1118  Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1119 #endif
1120  KokkosSparse::Experimental::sptrsv_compute
1121  (&device_khL_, &device_khU_, data_.L, data_.U);
1122  }
1123 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1124 }
1125 
1126 template <class Matrix, class Vector>
1127 void
1128 Superlu<Matrix,Vector>::triangular_solve() const
1129 {
1130 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1131  size_t ld_rhs = device_xValues_.extent(0);
1132  size_t nrhs = device_xValues_.extent(1);
1133 
1134  Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1135  Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1136 
1137  // forward pivot
1138  auto local_device_bValues = device_bValues_;
1139  auto local_device_trsv_perm_r = device_trsv_perm_r_;
1140  auto local_device_trsv_rhs = device_trsv_rhs_;
1141 
1142  if (data_.rowequ) {
1143  auto local_device_trsv_R = device_trsv_R_;
1144  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1145  KOKKOS_LAMBDA(size_t j) {
1146  for(size_t k = 0; k < nrhs; ++k) {
1147  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1148  }
1149  });
1150  } else {
1151  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1152  KOKKOS_LAMBDA(size_t j) {
1153  for(size_t k = 0; k < nrhs; ++k) {
1154  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1155  }
1156  });
1157  }
1158 
1159  for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
1160  auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1161  auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1162 
1163  // do L solve= - numeric (only rhs is modified) on the default device/host space
1164  {
1165 #ifdef HAVE_AMESOS2_TIMERS
1166  Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for L solve");
1167  Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1168 #endif
1169  KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1170  }
1171  // do L^T solve - numeric (only rhs is modified) on the default device/host space
1172  {
1173 #ifdef HAVE_AMESOS2_TIMERS
1174  Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for U solve");
1175  Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1176 #endif
1177  KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1178  }
1179  } // end loop over rhs vectors
1180 
1181  // backward pivot
1182  auto local_device_xValues = device_xValues_;
1183  auto local_device_trsv_perm_c = device_trsv_perm_c_;
1184  if (data_.colequ) {
1185  auto local_device_trsv_C = device_trsv_C_;
1186  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1187  KOKKOS_LAMBDA(size_t j) {
1188  for(size_t k = 0; k < nrhs; ++k) {
1189  local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1190  }
1191  });
1192  } else {
1193  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1194  KOKKOS_LAMBDA(size_t j) {
1195  for(size_t k = 0; k < nrhs; ++k) {
1196  local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1197  }
1198  });
1199  }
1200 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1201 }
1202 
1203 template<class Matrix, class Vector>
1204 const char* Superlu<Matrix,Vector>::name = "SuperLU";
1205 
1206 
1207 } // end namespace Amesos2
1208 
1209 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:937
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:283
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:94
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:674
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:793
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:117
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:473
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superlu_def.hpp:685
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:171
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:196
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:42