Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superlu_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
53 #ifndef AMESOS2_SUPERLU_DEF_HPP
54 #define AMESOS2_SUPERLU_DEF_HPP
55 
56 #include <Teuchos_Tuple.hpp>
57 #include <Teuchos_ParameterList.hpp>
58 #include <Teuchos_StandardParameterEntryValidators.hpp>
59 
61 #include "Amesos2_Superlu_decl.hpp"
62 
63 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
64 // TODO: This 'using namespace SLU' is not a good thing.
65 // It was added because kernels does not namespace SuperLU but Amesos2 does.
66 // Declaring the namespace SLU allows us to reuse that file without duplication.
67 // We need to determine a uniform system to avoid this this but for now, at least
68 // this issue is only present when TRSV is enabled.
69 using namespace SLU;
70 #include "KokkosSparse_sptrsv_superlu.hpp"
71 #endif
72 
73 namespace Amesos2 {
74 
75 
76 template <class Matrix, class Vector>
78  Teuchos::RCP<const Matrix> A,
79  Teuchos::RCP<Vector> X,
80  Teuchos::RCP<const Vector> B )
81  : SolverCore<Amesos2::Superlu,Matrix,Vector>(A, X, B)
82 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
83  , sptrsv_invert_diag_(true)
84  , sptrsv_invert_offdiag_ (false)
85  , sptrsv_u_in_csr_ (true)
86  , sptrsv_merge_supernodes_ (false)
87  , sptrsv_use_spmv_ (false)
88 #endif
89  , is_contiguous_(true) // default is set by params
90  , use_triangular_solves_(false) // default is set by params
91  , use_metis_(false)
92  , symmetrize_metis_(true)
93 {
94  // ilu_set_default_options is called later in set parameter list if required.
95  // This is not the ideal way, but the other option to always call
96  // ilu_set_default_options here and assuming it won't have any side effect
97  // in the TPL is more dangerous. It is not a good idea to rely on external
98  // libraries' internal "features".
99  SLU::set_default_options(&(data_.options));
100  // Override some default options
101  data_.options.PrintStat = SLU::NO;
102 
103  SLU::StatInit(&(data_.stat));
104 
105  Kokkos::resize(data_.perm_r, this->globalNumRows_);
106  Kokkos::resize(data_.perm_c, this->globalNumCols_);
107  Kokkos::resize(data_.etree, this->globalNumCols_);
108  Kokkos::resize(data_.R, this->globalNumRows_);
109  Kokkos::resize(data_.C, this->globalNumCols_);
110 
111  data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu
112  data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size
113 
114  data_.equed = 'N'; // No equilibration
115  data_.rowequ = false;
116  data_.colequ = false;
117  data_.A.Store = NULL;
118  data_.L.Store = NULL;
119  data_.U.Store = NULL;
120  data_.X.Store = NULL;
121  data_.B.Store = NULL;
122 
123  ILU_Flag_=false; // default: turn off ILU
124 }
125 
126 
127 template <class Matrix, class Vector>
129 {
130  /* Free Superlu data_types
131  * - Matrices
132  * - Vectors
133  * - Stat object
134  */
135  SLU::StatFree( &(data_.stat) ) ;
136 
137  // Storage is initialized in numericFactorization_impl()
138  if ( data_.A.Store != NULL ){
139  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
140  }
141 
142  // only root allocated these SuperMatrices.
143  if ( data_.L.Store != NULL ){ // will only be true for this->root_
144  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
145  SLU::Destroy_CompCol_Matrix( &(data_.U) );
146  }
147 }
148 
149 template <class Matrix, class Vector>
150 std::string
152 {
153  std::ostringstream oss;
154  oss << "SuperLU solver interface";
155  if (ILU_Flag_) {
156  oss << ", \"ILUTP\" : {";
157  oss << "drop tol = " << data_.options.ILU_DropTol;
158  oss << ", fill factor = " << data_.options.ILU_FillFactor;
159  oss << ", fill tol = " << data_.options.ILU_FillTol;
160  switch(data_.options.ILU_MILU) {
161  case SLU::SMILU_1 :
162  oss << ", MILU 1";
163  break;
164  case SLU::SMILU_2 :
165  oss << ", MILU 2";
166  break;
167  case SLU::SMILU_3 :
168  oss << ", MILU 3";
169  break;
170  case SLU::SILU :
171  default:
172  oss << ", regular ILU";
173  }
174  switch(data_.options.ILU_Norm) {
175  case SLU::ONE_NORM :
176  oss << ", 1-norm";
177  break;
178  case SLU::TWO_NORM :
179  oss << ", 2-norm";
180  break;
181  case SLU::INF_NORM :
182  default:
183  oss << ", infinity-norm";
184  }
185  oss << "}";
186  } else {
187  oss << ", direct solve";
188  }
189  return oss.str();
190  /*
191 
192  // ILU parameters
193  if( parameterList->isParameter("RowPerm") ){
194  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
195  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
196  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
197  }
198 
199 
200  */
201 }
202 
203 template<class Matrix, class Vector>
204 int
206 {
207  /*
208  * Get column permutation vector perm_c[], according to permc_spec:
209  * permc_spec = NATURAL: natural ordering
210  * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
211  * permc_spec = MMD_ATA: minimum degree on structure of A'*A
212  * permc_spec = COLAMD: approximate minimum degree column ordering
213  * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
214  */
215  int permc_spec = data_.options.ColPerm;
216  if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
217 #ifdef HAVE_AMESOS2_TIMERS
218  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
219 #endif
220 
221  SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
222  }
223 
224  return(0);
225 }
226 
227 
228 template <class Matrix, class Vector>
229 int
231 {
232  /*
233  * SuperLU performs symbolic factorization and numeric factorization
234  * together, but does leave some options for reusing symbolic
235  * structures that have been created on previous factorizations. If
236  * our Amesos2 user calls this function, that is an indication that
237  * the symbolic structure of the matrix is no longer valid, and
238  * SuperLU should do the factorization from scratch.
239  *
240  * This can be accomplished by setting the options.Fact flag to
241  * DOFACT, as well as setting our own internal flag to false.
242  */
243 #ifdef HAVE_AMESOS2_TIMERS
244  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
245 #endif
246  same_symbolic_ = false;
247  data_.options.Fact = SLU::DOFACT;
248 
249  if (use_metis_) {
250 #ifdef HAVE_AMESOS2_TIMERS
251  Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for METIS");
252  Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
253 #endif
254  size_t n = this->globalNumRows_;
255  size_t nz = this->globalNumNonZeros_;
256  host_size_type_array host_rowind_view_ = host_rows_view_;
257  host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
258 
259  /* symmetrize the matrix (A + A') if needed */
260  if (symmetrize_metis_) {
261  int new_nz = 0;
262  int *new_col_ptr;
263  int *new_row_ind;
264 
265  // NOTE: both size_type and ordinal_type are defined as int
266  // Consider using "symmetrize_graph" in KokkosKernels, if this becomes significant in time.
267  SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
268  &new_nz, &new_col_ptr, &new_row_ind);
269 
270  // reallocate (so that we won't overwrite the original)
271  // and copy for output
272  nz = new_nz;
273  Kokkos::resize (host_rowind_view_, nz);
274  Kokkos::realloc(host_colptr_view_, n+1);
275  for (size_t i = 0; i <= n; i++) {
276  host_colptr_view_(i) = new_col_ptr[i];
277  }
278  for (size_t i = 0; i < nz; i++) {
279  host_rowind_view_(i) = new_row_ind[i];
280  }
281 
282  // free
283  SLU::SUPERLU_FREE(new_col_ptr);
284  if (new_nz > 0) {
285  SLU::SUPERLU_FREE(new_row_ind);
286  }
287  }
288 
289  // reorder will convert both graph and perm/iperm to the internal METIS integer type
290  using metis_int_array_type = host_ordinal_type_array;
291  metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_perm"), n);
292  metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing("metis_iperm"), n);
293 
294  // call METIS
295  size_t new_nnz = 0;
296  Amesos2::Util::reorder(
297  host_colptr_view_, host_rowind_view_,
298  metis_perm, metis_iperm, new_nnz, false);
299 
300  for (size_t i = 0; i < n; i++) {
301  data_.perm_r(i) = metis_iperm(i);
302  data_.perm_c(i) = metis_iperm(i);
303  }
304 
305  // SLU will use user-specified METIS ordering
306  data_.options.ColPerm = SLU::MY_PERMC;
307  data_.options.RowPerm = SLU::MY_PERMR;
308  // Turn off Equil not to mess up METIS ordering?
309  //data_.options.Equil = SLU::NO;
310  }
311  return(0);
312 }
313 
314 
315 template <class Matrix, class Vector>
316 int
318 {
319  using Teuchos::as;
320 
321  // Cleanup old L and U matrices if we are not reusing a symbolic
322  // factorization. Stores and other data will be allocated in gstrf.
323  // Only rank 0 has valid pointers
324  if ( !same_symbolic_ && data_.L.Store != NULL ){
325  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
326  SLU::Destroy_CompCol_Matrix( &(data_.U) );
327  data_.L.Store = NULL;
328  data_.U.Store = NULL;
329  }
330 
331  if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
332 
333  int info = 0;
334  if ( this->root_ ){
335 
336 #ifdef HAVE_AMESOS2_DEBUG
337  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
338  std::runtime_error,
339  "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
340  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
341  std::runtime_error,
342  "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
343 #endif
344 
345  if( data_.options.Equil == SLU::YES ){
346  magnitude_type rowcnd, colcnd, amax;
347  int info2 = 0;
348 
349  // calculate row and column scalings
350  function_map::gsequ(&(data_.A), data_.R.data(),
351  data_.C.data(), &rowcnd, &colcnd,
352  &amax, &info2);
353  TEUCHOS_TEST_FOR_EXCEPTION
354  (info2 < 0, std::runtime_error,
355  "SuperLU's gsequ function returned with status " << info2 << " < 0."
356  " This means that argument " << (-info2) << " given to the function"
357  " had an illegal value.");
358  if (info2 > 0) {
359  if (info2 <= data_.A.nrow) {
360  TEUCHOS_TEST_FOR_EXCEPTION
361  (true, std::runtime_error, "SuperLU's gsequ function returned with "
362  "info = " << info2 << " > 0, and info <= A.nrow = " << data_.A.nrow
363  << ". This means that row " << info2 << " of A is exactly zero.");
364  }
365  else if (info2 > data_.A.ncol) {
366  TEUCHOS_TEST_FOR_EXCEPTION
367  (true, std::runtime_error, "SuperLU's gsequ function returned with "
368  "info = " << info2 << " > 0, and info > A.ncol = " << data_.A.ncol
369  << ". This means that column " << (info2 - data_.A.nrow) << " of "
370  "A is exactly zero.");
371  }
372  else {
373  TEUCHOS_TEST_FOR_EXCEPTION
374  (true, std::runtime_error, "SuperLU's gsequ function returned "
375  "with info = " << info2 << " > 0, but its value is not in the "
376  "range permitted by the documentation. This should never happen "
377  "(it appears to be SuperLU's fault).");
378  }
379  }
380 
381  // apply row and column scalings if necessary
382  function_map::laqgs(&(data_.A), data_.R.data(),
383  data_.C.data(), rowcnd, colcnd,
384  amax, &(data_.equed));
385 
386  // check what types of equilibration was actually done
387  data_.rowequ = (data_.equed == 'R') || (data_.equed == 'B');
388  data_.colequ = (data_.equed == 'C') || (data_.equed == 'B');
389  }
390 
391  // Apply the column permutation computed in preOrdering. Place the
392  // column-permuted matrix in AC
393  SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
394  data_.etree.data(), &(data_.AC));
395 
396  { // Do factorization
397 #ifdef HAVE_AMESOS2_TIMERS
398  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
399 #endif
400 
401 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
402  std::cout << "Superlu:: Before numeric factorization" << std::endl;
403  std::cout << "nzvals_ : " << nzvals_.toString() << std::endl;
404  std::cout << "rowind_ : " << rowind_.toString() << std::endl;
405  std::cout << "colptr_ : " << colptr_.toString() << std::endl;
406 #endif
407 
408  if(ILU_Flag_==false) {
409  function_map::gstrf(&(data_.options), &(data_.AC),
410  data_.relax, data_.panel_size, data_.etree.data(),
411  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
412  &(data_.L), &(data_.U),
413 #ifdef HAVE_AMESOS2_SUPERLU5_API
414  &(data_.lu),
415 #endif
416  &(data_.stat), &info);
417  }
418  else {
419  function_map::gsitrf(&(data_.options), &(data_.AC),
420  data_.relax, data_.panel_size, data_.etree.data(),
421  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
422  &(data_.L), &(data_.U),
423 #ifdef HAVE_AMESOS2_SUPERLU5_API
424  &(data_.lu),
425 #endif
426  &(data_.stat), &info);
427  }
428 
429  if (data_.options.ConditionNumber == SLU::YES) {
430  char norm[1];
431  if (data_.options.Trans == SLU::NOTRANS) {
432  *(unsigned char *)norm = '1';
433  } else {
434  *(unsigned char *)norm = 'I';
435  }
436 
437  data_.anorm = function_map::langs(norm, &(data_.A));
438  function_map::gscon(norm, &(data_.L), &(data_.U),
439  data_.anorm, &(data_.rcond),
440  &(data_.stat), &info);
441  }
442  }
443  // Cleanup. AC data will be alloc'd again for next factorization (if at all)
444  SLU::Destroy_CompCol_Permuted( &(data_.AC) );
445 
446  // Set the number of non-zero values in the L and U factors
447  this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
448  ((SLU::NCformat*)data_.U.Store)->nnz) );
449  }
450 
451  /* All processes should have the same error code */
452  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
453 
454  global_size_type info_st = as<global_size_type>(info);
455  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
456  std::runtime_error,
457  "Factorization complete, but matrix is singular. Division by zero eminent");
458  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
459  std::runtime_error,
460  "Memory allocation failure in Superlu factorization");
461 
462  data_.options.Fact = SLU::FACTORED;
463  same_symbolic_ = true;
464 
465  if(use_triangular_solves_) {
466 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
467  #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
468  if (this->getComm()->getRank()) {
469  std::cout << " > Metis : " << (use_metis_ ? "YES" : "NO") << std::endl;
470  std::cout << " > Equil : " << (data_.options.Equil == SLU::YES ? "YES" : "NO") << std::endl;
471  std::cout << " > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ? "YES" : "NO") << std::endl;
472  std::cout << " > Invert diag : " << sptrsv_invert_diag_ << std::endl;
473  std::cout << " > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
474  std::cout << " > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
475  std::cout << " > Merge : " << sptrsv_merge_supernodes_ << std::endl;
476  std::cout << " > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
477  }
478  //std::cout << myRank << " : siize(A) " << (data_.A.nrow) << " x " << (data_.A.ncol) << std::endl;
479  //std::cout << myRank << " : nnz(A) " << ((SLU::NCformat*)data_.A.Store)->nnz << std::endl;
480  //std::cout << myRank << " : nnz(L) " << ((SLU::SCformat*)data_.L.Store)->nnz << std::endl;
481  //std::cout << myRank << " : nnz(U) " << ((SLU::SCformat*)data_.U.Store)->nnz << std::endl;
482  #endif
483 #endif
484 #ifdef HAVE_AMESOS2_TIMERS
485  Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv setup");
486  Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
487 #endif
488  triangular_solve_factor();
489  }
490 
491  return(info);
492 }
493 
494 
495 template <class Matrix, class Vector>
496 int
498  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
499 {
500  using Teuchos::as;
501 #ifdef HAVE_AMESOS2_TIMERS
502  Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for Amesos2");
503  Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
504 #endif
505 
506  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
507  const size_t nrhs = X->getGlobalNumVectors();
508 
509  bool bDidAssignX = false; // will be set below
510  bool bDidAssignB = false; // will be set below
511  { // Get values from RHS B
512 #ifdef HAVE_AMESOS2_TIMERS
513  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
514  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
515 #endif
516 
517  // In general we may want to write directly to the x space without a copy.
518  // So we 'get' x which may be a direct view assignment to the MV.
519  const bool initialize_data = true;
520  const bool do_not_initialize_data = false;
521  if(use_triangular_solves_) { // to device
522 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
523  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
524  device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
525  as<size_t>(ld_rhs),
526  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
527  this->rowIndexBase_);
528  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
529  device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
530  as<size_t>(ld_rhs),
531  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
532  this->rowIndexBase_);
533 #endif
534  }
535  else { // to host
536  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
537  host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
538  as<size_t>(ld_rhs),
539  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
540  this->rowIndexBase_);
541  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
542  host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
543  as<size_t>(ld_rhs),
544  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
545  this->rowIndexBase_);
546  }
547  }
548 
549  // If equilibration was applied at numeric, then gssvx and gsisx are going to
550  // modify B, so we can't use the optimized assignment to B since we'll change
551  // the source test vector and then fail the subsequent cycle. We need a copy.
552  // If bDidAssignB is false, then we skip the copy since it was copied already.
553  if(bDidAssignB && data_.equed != 'N') {
554  if(use_triangular_solves_) { // to device
555 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
556  device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
557  device_bValues_.extent(0), device_bValues_.extent(1));
558  Kokkos::deep_copy(copyB, device_bValues_);
559  device_bValues_ = copyB;
560 #endif
561  }
562  else {
563  host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
564  host_bValues_.extent(0), host_bValues_.extent(1));
565  Kokkos::deep_copy(copyB, host_bValues_);
566  host_bValues_ = copyB;
567  }
568  }
569 
570  int ierr = 0; // returned error code
571 
572  magnitude_type rpg, rcond;
573  if ( this->root_ ) {
574  // Note: the values of B and X (after solution) are adjusted
575  // appropriately within gssvx for row and column scalings.
576  { // Do solve!
577 #ifdef HAVE_AMESOS2_TIMERS
578  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
579 #endif
580 
581  if(use_triangular_solves_) {
582  triangular_solve();
583  }
584  else {
585  Kokkos::resize(data_.ferr, nrhs);
586  Kokkos::resize(data_.berr, nrhs);
587 
588  {
589  #ifdef HAVE_AMESOS2_TIMERS
590  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
591  #endif
592  SLU::Dtype_t dtype = type_map::dtype;
593  int i_ld_rhs = as<int>(ld_rhs);
594  function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
595  convert_bValues_, host_bValues_, i_ld_rhs,
596  SLU::SLU_DN, dtype, SLU::SLU_GE);
597  function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
598  convert_xValues_, host_xValues_, i_ld_rhs,
599  SLU::SLU_DN, dtype, SLU::SLU_GE);
600  }
601 
602  if(ILU_Flag_==false) {
603  function_map::gssvx(&(data_.options), &(data_.A),
604  data_.perm_c.data(), data_.perm_r.data(),
605  data_.etree.data(), &(data_.equed), data_.R.data(),
606  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
607  &(data_.X), &rpg, &rcond, data_.ferr.data(),
608  data_.berr.data(),
609  #ifdef HAVE_AMESOS2_SUPERLU5_API
610  &(data_.lu),
611  #endif
612  &(data_.mem_usage), &(data_.stat), &ierr);
613  }
614  else {
615  function_map::gsisx(&(data_.options), &(data_.A),
616  data_.perm_c.data(), data_.perm_r.data(),
617  data_.etree.data(), &(data_.equed), data_.R.data(),
618  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
619  &(data_.X), &rpg, &rcond,
620  #ifdef HAVE_AMESOS2_SUPERLU5_API
621  &(data_.lu),
622  #endif
623  &(data_.mem_usage), &(data_.stat), &ierr);
624  }
625 
626  // Cleanup X and B stores
627  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
628  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
629  data_.X.Store = NULL;
630  data_.B.Store = NULL;
631  }
632 
633  } // do solve
634 
635  // convert back to Kokkos views
636  {
637 #ifdef HAVE_AMESOS2_TIMERS
638  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
639 #endif
640  function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
641  function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
642  }
643 
644  // Cleanup X and B stores
645  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
646  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
647  data_.X.Store = NULL;
648  data_.B.Store = NULL;
649  }
650 
651  /* All processes should have the same error code */
652  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
653 
654  global_size_type ierr_st = as<global_size_type>(ierr);
655  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
656  std::invalid_argument,
657  "Argument " << -ierr << " to SuperLU xgssvx had illegal value" );
658  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
659  std::runtime_error,
660  "Factorization complete, but U is exactly singular" );
661  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
662  std::runtime_error,
663  "SuperLU allocated " << ierr - this->globalNumCols_ << " bytes of "
664  "memory before allocation failure occured." );
665 
666  /* Update X's global values */
667 
668  // if bDidAssignX, then we solved straight to the adapter's X memory space without
669  // requiring additional memory allocation, so the x data is already in place.
670  if(!bDidAssignX) {
671 #ifdef HAVE_AMESOS2_TIMERS
672  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
673 #endif
674 
675  if(use_triangular_solves_) { // to device
676 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
677  Util::put_1d_data_helper_kokkos_view<
678  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
679  as<size_t>(ld_rhs),
680  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
681  this->rowIndexBase_);
682 #endif
683  }
684  else {
685  Util::put_1d_data_helper_kokkos_view<
686  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
687  as<size_t>(ld_rhs),
688  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
689  this->rowIndexBase_);
690  }
691  }
692 
693  return(ierr);
694 }
695 
696 
697 template <class Matrix, class Vector>
698 bool
700 {
701  // The Superlu factorization routines can handle square as well as
702  // rectangular matrices, but Superlu can only apply the solve routines to
703  // square matrices, so we check the matrix for squareness.
704  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
705 }
706 
707 
708 template <class Matrix, class Vector>
709 void
710 Superlu<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
711 {
712  using Teuchos::RCP;
713  using Teuchos::getIntegralValue;
714  using Teuchos::ParameterEntryValidator;
715 
716  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
717 
718  ILU_Flag_ = parameterList->get<bool>("ILU_Flag",false);
719  if (ILU_Flag_) {
720  SLU::ilu_set_default_options(&(data_.options));
721  // Override some default options
722  data_.options.PrintStat = SLU::NO;
723  }
724 
725  data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
726  // The SuperLU transpose option can override the Amesos2 option
727  if( parameterList->isParameter("Trans") ){
728  RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
729  parameterList->getEntry("Trans").setValidator(trans_validator);
730 
731  data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList, "Trans");
732  }
733 
734  if( parameterList->isParameter("IterRefine") ){
735  RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry("IterRefine").validator();
736  parameterList->getEntry("IterRefine").setValidator(refine_validator);
737 
738  data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList, "IterRefine");
739  }
740 
741  if( parameterList->isParameter("ColPerm") ){
742  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
743  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
744 
745  data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList, "ColPerm");
746  }
747 
748  data_.options.DiagPivotThresh = parameterList->get<double>("DiagPivotThresh", 1.0);
749 
750  bool equil = parameterList->get<bool>("Equil", true);
751  data_.options.Equil = equil ? SLU::YES : SLU::NO;
752 
753  bool condNum = parameterList->get<bool>("ConditionNumber", false);
754  data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
755 
756  bool symmetric_mode = parameterList->get<bool>("SymmetricMode", false);
757  data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
758 
759  // ILU parameters
760  if( parameterList->isParameter("RowPerm") ){
761  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
762  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
763  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
764  }
765 
766  /*if( parameterList->isParameter("ILU_DropRule") ) {
767  RCP<const ParameterEntryValidator> droprule_validator = valid_params->getEntry("ILU_DropRule").validator();
768  parameterList->getEntry("ILU_DropRule").setValidator(droprule_validator);
769  data_.options.ILU_DropRule = getIntegralValue<SLU::rule_t>(*parameterList, "ILU_DropRule");
770  }*/
771 
772  data_.options.ILU_DropTol = parameterList->get<double>("ILU_DropTol", 0.0001);
773 
774  data_.options.ILU_FillFactor = parameterList->get<double>("ILU_FillFactor", 10.0);
775 
776  if( parameterList->isParameter("ILU_Norm") ) {
777  RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry("ILU_Norm").validator();
778  parameterList->getEntry("ILU_Norm").setValidator(norm_validator);
779  data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList, "ILU_Norm");
780  }
781 
782  if( parameterList->isParameter("ILU_MILU") ) {
783  RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry("ILU_MILU").validator();
784  parameterList->getEntry("ILU_MILU").setValidator(milu_validator);
785  data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList, "ILU_MILU");
786  }
787 
788  data_.options.ILU_FillTol = parameterList->get<double>("ILU_FillTol", 0.01);
789 
790  is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
791 
792  use_metis_ = parameterList->get<bool>("UseMetis", false);
793  symmetrize_metis_ = parameterList->get<bool>("SymmetrizeMetis", true);
794 
795  use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
796  if(use_triangular_solves_) {
797 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
798  // specify whether to invert diagonal blocks
799  sptrsv_invert_diag_ = parameterList->get<bool>("SpTRSV_Invert_Diag", true);
800  // specify whether to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
801  sptrsv_invert_offdiag_ = parameterList->get<bool>("SpTRSV_Invert_OffDiag", false);
802  // specify whether to store U in CSR format
803  sptrsv_u_in_csr_ = parameterList->get<bool>("SpTRSV_U_CSR", true);
804  // specify whether to merge supernodes with similar sparsity structures
805  sptrsv_merge_supernodes_ = parameterList->get<bool>("SpTRSV_Merge_Supernodes", false);
806  // specify whether to use spmv for sptrsv
807  sptrsv_use_spmv_ = parameterList->get<bool>("SpTRSV_Use_SpMV", false);
808 #else
809  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
810  "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
811 #endif
812  }
813 }
814 
815 
816 template <class Matrix, class Vector>
817 Teuchos::RCP<const Teuchos::ParameterList>
819 {
820  using std::string;
821  using Teuchos::tuple;
822  using Teuchos::ParameterList;
823  using Teuchos::EnhancedNumberValidator;
824  using Teuchos::setStringToIntegralParameter;
825  using Teuchos::stringToIntegralParameterEntryValidator;
826 
827  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
828 
829  if( is_null(valid_params) ){
830  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
831 
832  setStringToIntegralParameter<SLU::trans_t>("Trans", "NOTRANS",
833  "Solve for the transpose system or not",
834  tuple<string>("TRANS","NOTRANS","CONJ"),
835  tuple<string>("Solve with transpose",
836  "Do not solve with transpose",
837  "Solve with the conjugate transpose"),
838  tuple<SLU::trans_t>(SLU::TRANS,
839  SLU::NOTRANS,
840  SLU::CONJ),
841  pl.getRawPtr());
842 
843  setStringToIntegralParameter<SLU::IterRefine_t>("IterRefine", "NOREFINE",
844  "Type of iterative refinement to use",
845  tuple<string>("NOREFINE", "SLU_SINGLE", "SLU_DOUBLE"),
846  tuple<string>("Do not use iterative refinement",
847  "Do single iterative refinement",
848  "Do double iterative refinement"),
849  tuple<SLU::IterRefine_t>(SLU::NOREFINE,
850  SLU::SLU_SINGLE,
851  SLU::SLU_DOUBLE),
852  pl.getRawPtr());
853 
854  // Note: MY_PERMC not yet supported
855  setStringToIntegralParameter<SLU::colperm_t>("ColPerm", "COLAMD",
856  "Specifies how to permute the columns of the "
857  "matrix for sparsity preservation",
858  tuple<string>("NATURAL", "MMD_AT_PLUS_A",
859  "MMD_ATA", "COLAMD"),
860  tuple<string>("Natural ordering",
861  "Minimum degree ordering on A^T + A",
862  "Minimum degree ordering on A^T A",
863  "Approximate minimum degree column ordering"),
864  tuple<SLU::colperm_t>(SLU::NATURAL,
865  SLU::MMD_AT_PLUS_A,
866  SLU::MMD_ATA,
867  SLU::COLAMD),
868  pl.getRawPtr());
869 
870  Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
871  = Teuchos::rcp( new EnhancedNumberValidator<double>(0.0, 1.0) );
872  pl->set("DiagPivotThresh", 1.0,
873  "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
874  diag_pivot_thresh_validator); // partial pivoting
875 
876  pl->set("Equil", true, "Whether to equilibrate the system before solve");
877  pl->set("ConditionNumber", false, "Whether to approximate condition number");
878 
879  pl->set("SymmetricMode", false,
880  "Specifies whether to use the symmetric mode. "
881  "Gives preference to diagonal pivots and uses "
882  "an (A^T + A)-based column permutation.");
883 
884  // ILU parameters
885 
886  setStringToIntegralParameter<SLU::rowperm_t>("RowPerm", "LargeDiag",
887  "Type of row permutation strategy to use",
888  tuple<string>("NOROWPERM","LargeDiag","MY_PERMR"),
889  tuple<string>("Use natural ordering",
890  "Use weighted bipartite matching algorithm",
891  "Use the ordering given in perm_r input"),
892  tuple<SLU::rowperm_t>(SLU::NOROWPERM,
893 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
894  SLU::LargeDiag_MC64,
895 #else
896  SLU::LargeDiag,
897 #endif
898  SLU::MY_PERMR),
899  pl.getRawPtr());
900 
901  /*setStringToIntegralParameter<SLU::rule_t>("ILU_DropRule", "DROP_BASIC",
902  "Type of dropping strategy to use",
903  tuple<string>("DROP_BASIC","DROP_PROWS",
904  "DROP_COLUMN","DROP_AREA",
905  "DROP_DYNAMIC","DROP_INTERP"),
906  tuple<string>("ILUTP(t)","ILUTP(p,t)",
907  "Variant of ILUTP(p,t) for j-th column",
908  "Variant of ILUTP to control memory",
909  "Dynamically adjust threshold",
910  "Compute second dropping threshold by interpolation"),
911  tuple<SLU::rule_t>(SLU::DROP_BASIC,SLU::DROP_PROWS,SLU::DROP_COLUMN,
912  SLU::DROP_AREA,SLU::DROP_DYNAMIC,SLU::DROP_INTERP),
913  pl.getRawPtr());*/
914 
915  pl->set("ILU_DropTol", 0.0001, "ILUT drop tolerance");
916 
917  pl->set("ILU_FillFactor", 10.0, "ILUT fill factor");
918 
919  setStringToIntegralParameter<SLU::norm_t>("ILU_Norm", "INF_NORM",
920  "Type of norm to use",
921  tuple<string>("ONE_NORM","TWO_NORM","INF_NORM"),
922  tuple<string>("1-norm","2-norm","inf-norm"),
923  tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
924  pl.getRawPtr());
925 
926  setStringToIntegralParameter<SLU::milu_t>("ILU_MILU", "SILU",
927  "Type of modified ILU to use",
928  tuple<string>("SILU","SMILU_1","SMILU_2","SMILU_3"),
929  tuple<string>("Regular ILU","MILU 1","MILU 2","MILU 3"),
930  tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
931  SLU::SMILU_3),
932  pl.getRawPtr());
933 
934  pl->set("ILU_FillTol", 0.01, "ILUT fill tolerance");
935 
936  pl->set("ILU_Flag", false, "ILU flag: if true, run ILU routines");
937 
938  pl->set("IsContiguous", true, "Whether GIDs contiguous");
939 
940  // call METIS before SuperLU
941  pl->set("UseMetis", false, "Whether to call METIS before SuperLU");
942  pl->set("SymmetrizeMetis", true, "Whether to symmetrize matrix before METIS");
943 
944  pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
945 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
946  pl->set("SpTRSV_Invert_Diag", true, "specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
947  pl->set("SpTRSV_Invert_OffDiag", false, "specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
948  pl->set("SpTRSV_U_CSR", true, "specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
949  pl->set("SpTRSV_Merge_Supernodes", false, "specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
950  pl->set("SpTRSV_Use_SpMV", false, "specify whether to use spmv for supernodal sparse-trianguular solve");
951 #endif
952 
953  valid_params = pl;
954  }
955 
956  return valid_params;
957 }
958 
959 
960 template <class Matrix, class Vector>
961 bool
963 {
964  using Teuchos::as;
965 
966 #ifdef HAVE_AMESOS2_TIMERS
967  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
968 #endif
969 
970  // SuperLU does not need the matrix at symbolicFactorization()
971  if( current_phase == SYMBFACT ) return false;
972 
973  // Cleanup old store memory if it's non-NULL (should only ever be non-NULL at root_)
974  if( data_.A.Store != NULL ){
975  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
976  data_.A.Store = NULL;
977  }
978 
979  // Only the root image needs storage allocated
980  if( this->root_ ){
981  Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
982  Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
983  Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
984  }
985 
986  int nnz_ret = 0;
987  {
988 #ifdef HAVE_AMESOS2_TIMERS
989  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
990 #endif
991 
992  TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
993  std::runtime_error,
994  "Row and column maps have different indexbase ");
995 
997  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
998  host_size_type_array>::do_get(this->matrixA_.ptr(),
999  host_nzvals_view_, host_rows_view_,
1000  host_col_ptr_view_, nnz_ret,
1001  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
1002  ARBITRARY,
1003  this->rowIndexBase_);
1004  }
1005 
1006  // Get the SLU data type for this type of matrix
1007  SLU::Dtype_t dtype = type_map::dtype;
1008 
1009  if( this->root_ ){
1010  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
1011  std::runtime_error,
1012  "Did not get the expected number of non-zero vals");
1013 
1014  function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
1015  this->globalNumRows_, this->globalNumCols_,
1016  nnz_ret,
1017  convert_nzvals_, host_nzvals_view_,
1018  host_rows_view_.data(),
1019  host_col_ptr_view_.data(),
1020  SLU::SLU_NC, dtype, SLU::SLU_GE);
1021  }
1022 
1023  return true;
1024 }
1025 
1026 template <class Matrix, class Vector>
1027 void
1029 {
1030 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1031  size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1032 
1033  // convert etree to parents
1034  SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1035  int nsuper = 1 + Lstore->nsuper; // # of supernodal columns
1036  Kokkos::resize(data_.parents, nsuper);
1037  for (int s = 0; s < nsuper; s++) {
1038  int j = Lstore->sup_to_col[s+1]-1; // the last column index of this supernode
1039  if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1040  data_.parents(s) = -1;
1041  } else {
1042  data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1043  }
1044  }
1045 
1046  deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r); // will use device to permute
1047  deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c); // will use device to permute
1048  if (data_.options.Equil == SLU::YES) {
1049  deep_copy_or_assign_view(device_trsv_R_, data_.R); // will use device to scale
1050  deep_copy_or_assign_view(device_trsv_C_, data_.C); // will use device to scale
1051  }
1052 
1053  bool condition_flag = false;
1054  if (data_.options.ConditionNumber == SLU::YES) {
1055  using STM = Teuchos::ScalarTraits<magnitude_type>;
1056  const magnitude_type eps = STM::eps ();
1057 
1058  SCformat *Lstore = (SCformat*)(data_.L.Store);
1059  int nsuper = 1 + Lstore->nsuper;
1060  int *nb = Lstore->sup_to_col;
1061  int max_cols = 0;
1062  for (int i = 0; i < nsuper; i++) {
1063  if (nb[i+1] - nb[i] > max_cols) {
1064  max_cols = nb[i+1] - nb[i];
1065  }
1066  }
1067 
1068  // when rcond is small, it is ill-conditioned and flag is true
1069  const magnitude_type multiply_fact (10.0); // larger the value, more likely flag is true, and no invert
1070  condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1071 
1072 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1073  int n = data_.perm_r.extent(0);
1074  std::cout << this->getComm()->getRank()
1075  << " : anorm = " << data_.anorm << ", rcond = " << data_.rcond << ", n = " << n
1076  << ", num super cols = " << nsuper << ", max super cols = " << max_cols
1077  << " -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1078  << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ? " (okay)" : " (warn)") << std::endl;
1079 #endif
1080  }
1081 
1082  // Create handles for U and U^T solves
1083  if (sptrsv_use_spmv_ && !condition_flag) {
1084  device_khL_.create_sptrsv_handle(
1085  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, true);
1086  device_khU_.create_sptrsv_handle(
1087  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs, false);
1088  } else {
1089  device_khL_.create_sptrsv_handle(
1090  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, true);
1091  device_khU_.create_sptrsv_handle(
1092  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs, false);
1093  }
1094 
1095  // specify whether to store U in CSR format
1096  device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1097 
1098  // specify whether to merge supernodes with similar sparsity structure
1099  device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1100  device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1101 
1102  // invert only if flag is not on
1103  bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1104  bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1105 
1106  // specify whether to invert diagonal blocks
1107  device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1108  device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1109 
1110  // specify wheather to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
1111  device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1112  device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1113 
1114  // set etree
1115  device_khL_.set_sptrsv_etree(data_.parents.data());
1116  device_khU_.set_sptrsv_etree(data_.parents.data());
1117 
1118  // set permutation
1119  device_khL_.set_sptrsv_perm(data_.perm_r.data());
1120  device_khU_.set_sptrsv_perm(data_.perm_c.data());
1121 
1122  int block_size = -1; // TODO: add needed params
1123  if (block_size >= 0) {
1124  std::cout << " Block Size : " << block_size << std::endl;
1125  device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1126  device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1127  }
1128 
1129  // Do symbolic analysis
1130  {
1131 #ifdef HAVE_AMESOS2_TIMERS
1132  Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv symbolic");
1133  Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1134 #endif
1135  KokkosSparse::Experimental::sptrsv_symbolic
1136  (&device_khL_, &device_khU_, data_.L, data_.U);
1137  }
1138 
1139  // Do numerical compute
1140  {
1141 #ifdef HAVE_AMESOS2_TIMERS
1142  Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for SpTrsv numeric");
1143  Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1144 #endif
1145  KokkosSparse::Experimental::sptrsv_compute
1146  (&device_khL_, &device_khU_, data_.L, data_.U);
1147  }
1148 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1149 }
1150 
1151 template <class Matrix, class Vector>
1152 void
1153 Superlu<Matrix,Vector>::triangular_solve() const
1154 {
1155 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1156  size_t ld_rhs = device_xValues_.extent(0);
1157  size_t nrhs = device_xValues_.extent(1);
1158 
1159  Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1160  Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1161 
1162  // forward pivot
1163  auto local_device_bValues = device_bValues_;
1164  auto local_device_trsv_perm_r = device_trsv_perm_r_;
1165  auto local_device_trsv_rhs = device_trsv_rhs_;
1166 
1167  if (data_.rowequ) {
1168  auto local_device_trsv_R = device_trsv_R_;
1169  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1170  KOKKOS_LAMBDA(size_t j) {
1171  for(size_t k = 0; k < nrhs; ++k) {
1172  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1173  }
1174  });
1175  } else {
1176  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1177  KOKKOS_LAMBDA(size_t j) {
1178  for(size_t k = 0; k < nrhs; ++k) {
1179  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1180  }
1181  });
1182  }
1183 
1184  for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
1185  auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1186  auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1187 
1188  // do L solve= - numeric (only rhs is modified) on the default device/host space
1189  {
1190 #ifdef HAVE_AMESOS2_TIMERS
1191  Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for L solve");
1192  Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1193 #endif
1194  KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1195  }
1196  // do L^T solve - numeric (only rhs is modified) on the default device/host space
1197  {
1198 #ifdef HAVE_AMESOS2_TIMERS
1199  Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter ("Time for U solve");
1200  Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1201 #endif
1202  KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1203  }
1204  } // end loop over rhs vectors
1205 
1206  // backward pivot
1207  auto local_device_xValues = device_xValues_;
1208  auto local_device_trsv_perm_c = device_trsv_perm_c_;
1209  if (data_.colequ) {
1210  auto local_device_trsv_C = device_trsv_C_;
1211  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1212  KOKKOS_LAMBDA(size_t j) {
1213  for(size_t k = 0; k < nrhs; ++k) {
1214  local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1215  }
1216  });
1217  } else {
1218  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1219  KOKKOS_LAMBDA(size_t j) {
1220  for(size_t k = 0; k < nrhs; ++k) {
1221  local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1222  }
1223  });
1224  }
1225 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1226 }
1227 
1228 template<class Matrix, class Vector>
1229 const char* Superlu<Matrix,Vector>::name = "SuperLU";
1230 
1231 
1232 } // end namespace Amesos2
1233 
1234 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:648
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:962
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:317
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:128
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:699
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:818
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:151
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:497
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superlu_def.hpp:710
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:205
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:230
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:76