Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superlu_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
53 #ifndef AMESOS2_SUPERLU_DEF_HPP
54 #define AMESOS2_SUPERLU_DEF_HPP
55 
56 #include <Teuchos_Tuple.hpp>
57 #include <Teuchos_ParameterList.hpp>
58 #include <Teuchos_StandardParameterEntryValidators.hpp>
59 
61 #include "Amesos2_Superlu_decl.hpp"
62 
63 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
64 // TODO: This 'using namespace SLU' is not a good thing.
65 // It was added because kernels does not namespace SuperLU but Amesos2 does.
66 // Declaring the namespace SLU allows us to reuse that file without duplication.
67 // We need to determine a uniform system to avoid this this but for now, at least
68 // this issue is only present when TRSV is enabled.
69 using namespace SLU;
70 #include "KokkosSparse_sptrsv_superlu.hpp"
71 #endif
72 
73 namespace Amesos2 {
74 
75 
76 template <class Matrix, class Vector>
78  Teuchos::RCP<const Matrix> A,
79  Teuchos::RCP<Vector> X,
80  Teuchos::RCP<const Vector> B )
81  : SolverCore<Amesos2::Superlu,Matrix,Vector>(A, X, B)
82  , is_contiguous_(true) // default is set by params
83  , use_triangular_solves_(false) // default is set by params
84 {
85  // ilu_set_default_options is called later in set parameter list if required.
86  // This is not the ideal way, but the other option to always call
87  // ilu_set_default_options here and assuming it won't have any side effect
88  // in the TPL is more dangerous. It is not a good idea to rely on external
89  // libraries' internal "features".
90  SLU::set_default_options(&(data_.options));
91  // Override some default options
92  data_.options.PrintStat = SLU::NO;
93 
94  SLU::StatInit(&(data_.stat));
95 
96  Kokkos::resize(data_.perm_r, this->globalNumRows_);
97  Kokkos::resize(data_.perm_c, this->globalNumCols_);
98  Kokkos::resize(data_.etree, this->globalNumCols_);
99  Kokkos::resize(data_.R, this->globalNumRows_);
100  Kokkos::resize(data_.C, this->globalNumCols_);
101 
102  data_.relax = SLU::sp_ienv(2); // Query optimal relax param from superlu
103  data_.panel_size = SLU::sp_ienv(1); // Query optimal panel size
104 
105  data_.equed = 'N'; // No equilibration
106  data_.A.Store = NULL;
107  data_.L.Store = NULL;
108  data_.U.Store = NULL;
109  data_.X.Store = NULL;
110  data_.B.Store = NULL;
111 
112  ILU_Flag_=false; // default: turn off ILU
113 }
114 
115 
116 template <class Matrix, class Vector>
118 {
119  /* Free Superlu data_types
120  * - Matrices
121  * - Vectors
122  * - Stat object
123  */
124  SLU::StatFree( &(data_.stat) ) ;
125 
126  // Storage is initialized in numericFactorization_impl()
127  if ( data_.A.Store != NULL ){
128  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
129  }
130 
131  // only root allocated these SuperMatrices.
132  if ( data_.L.Store != NULL ){ // will only be true for this->root_
133  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
134  SLU::Destroy_CompCol_Matrix( &(data_.U) );
135  }
136 }
137 
138 template <class Matrix, class Vector>
139 std::string
141 {
142  std::ostringstream oss;
143  oss << "SuperLU solver interface";
144  if (ILU_Flag_) {
145  oss << ", \"ILUTP\" : {";
146  oss << "drop tol = " << data_.options.ILU_DropTol;
147  oss << ", fill factor = " << data_.options.ILU_FillFactor;
148  oss << ", fill tol = " << data_.options.ILU_FillTol;
149  switch(data_.options.ILU_MILU) {
150  case SLU::SMILU_1 :
151  oss << ", MILU 1";
152  break;
153  case SLU::SMILU_2 :
154  oss << ", MILU 2";
155  break;
156  case SLU::SMILU_3 :
157  oss << ", MILU 3";
158  break;
159  case SLU::SILU :
160  default:
161  oss << ", regular ILU";
162  }
163  switch(data_.options.ILU_Norm) {
164  case SLU::ONE_NORM :
165  oss << ", 1-norm";
166  break;
167  case SLU::TWO_NORM :
168  oss << ", 2-norm";
169  break;
170  case SLU::INF_NORM :
171  default:
172  oss << ", infinity-norm";
173  }
174  oss << "}";
175  } else {
176  oss << ", direct solve";
177  }
178  return oss.str();
179  /*
180 
181  // ILU parameters
182  if( parameterList->isParameter("RowPerm") ){
183  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
184  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
185  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
186  }
187 
188 
189  */
190 }
191 
192 template<class Matrix, class Vector>
193 int
195 {
196  /*
197  * Get column permutation vector perm_c[], according to permc_spec:
198  * permc_spec = NATURAL: natural ordering
199  * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
200  * permc_spec = MMD_ATA: minimum degree on structure of A'*A
201  * permc_spec = COLAMD: approximate minimum degree column ordering
202  * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
203  */
204  int permc_spec = data_.options.ColPerm;
205  if ( permc_spec != SLU::MY_PERMC && this->root_ ){
206 #ifdef HAVE_AMESOS2_TIMERS
207  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
208 #endif
209 
210  SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
211  }
212 
213  return(0);
214 }
215 
216 
217 template <class Matrix, class Vector>
218 int
220 {
221  /*
222  * SuperLU performs symbolic factorization and numeric factorization
223  * together, but does leave some options for reusing symbolic
224  * structures that have been created on previous factorizations. If
225  * our Amesos2 user calls this function, that is an indication that
226  * the symbolic structure of the matrix is no longer valid, and
227  * SuperLU should do the factorization from scratch.
228  *
229  * This can be accomplished by setting the options.Fact flag to
230  * DOFACT, as well as setting our own internal flag to false.
231  */
232  same_symbolic_ = false;
233  data_.options.Fact = SLU::DOFACT;
234 
235  return(0);
236 }
237 
238 
239 template <class Matrix, class Vector>
240 int
242 {
243  using Teuchos::as;
244 
245  // Cleanup old L and U matrices if we are not reusing a symbolic
246  // factorization. Stores and other data will be allocated in gstrf.
247  // Only rank 0 has valid pointers
248  if ( !same_symbolic_ && data_.L.Store != NULL ){
249  SLU::Destroy_SuperNode_Matrix( &(data_.L) );
250  SLU::Destroy_CompCol_Matrix( &(data_.U) );
251  data_.L.Store = NULL;
252  data_.U.Store = NULL;
253  }
254 
255  if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
256 
257  int info = 0;
258  if ( this->root_ ){
259 
260 #ifdef HAVE_AMESOS2_DEBUG
261  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
262  std::runtime_error,
263  "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
264  TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
265  std::runtime_error,
266  "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
267 #endif
268 
269  if( data_.options.Equil == SLU::YES ){
270  magnitude_type rowcnd, colcnd, amax;
271  int info2 = 0;
272 
273  // calculate row and column scalings
274  function_map::gsequ(&(data_.A), data_.R.data(),
275  data_.C.data(), &rowcnd, &colcnd,
276  &amax, &info2);
277  TEUCHOS_TEST_FOR_EXCEPTION
278  (info2 < 0, std::runtime_error,
279  "SuperLU's gsequ function returned with status " << info2 << " < 0."
280  " This means that argument " << (-info2) << " given to the function"
281  " had an illegal value.");
282  if (info2 > 0) {
283  if (info2 <= data_.A.nrow) {
284  TEUCHOS_TEST_FOR_EXCEPTION
285  (true, std::runtime_error, "SuperLU's gsequ function returned with "
286  "info = " << info2 << " > 0, and info <= A.nrow = " << data_.A.nrow
287  << ". This means that row " << info2 << " of A is exactly zero.");
288  }
289  else if (info2 > data_.A.ncol) {
290  TEUCHOS_TEST_FOR_EXCEPTION
291  (true, std::runtime_error, "SuperLU's gsequ function returned with "
292  "info = " << info2 << " > 0, and info > A.ncol = " << data_.A.ncol
293  << ". This means that column " << (info2 - data_.A.nrow) << " of "
294  "A is exactly zero.");
295  }
296  else {
297  TEUCHOS_TEST_FOR_EXCEPTION
298  (true, std::runtime_error, "SuperLU's gsequ function returned "
299  "with info = " << info2 << " > 0, but its value is not in the "
300  "range permitted by the documentation. This should never happen "
301  "(it appears to be SuperLU's fault).");
302  }
303  }
304 
305  // apply row and column scalings if necessary
306  function_map::laqgs(&(data_.A), data_.R.data(),
307  data_.C.data(), rowcnd, colcnd,
308  amax, &(data_.equed));
309 
310  // // check what types of equilibration was actually done
311  // data_.rowequ = (data_.equed == 'R') || (data_.equed == 'B');
312  // data_.colequ = (data_.equed == 'C') || (data_.equed == 'B');
313  }
314 
315  // Apply the column permutation computed in preOrdering. Place the
316  // column-permuted matrix in AC
317  SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
318  data_.etree.data(), &(data_.AC));
319 
320  { // Do factorization
321 #ifdef HAVE_AMESOS2_TIMERS
322  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
323 #endif
324 
325 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
326  std::cout << "Superlu:: Before numeric factorization" << std::endl;
327  std::cout << "nzvals_ : " << nzvals_.toString() << std::endl;
328  std::cout << "rowind_ : " << rowind_.toString() << std::endl;
329  std::cout << "colptr_ : " << colptr_.toString() << std::endl;
330 #endif
331 
332  if(ILU_Flag_==false) {
333  function_map::gstrf(&(data_.options), &(data_.AC),
334  data_.relax, data_.panel_size, data_.etree.data(),
335  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
336  &(data_.L), &(data_.U),
337 #ifdef HAVE_AMESOS2_SUPERLU5_API
338  &(data_.lu),
339 #endif
340  &(data_.stat), &info);
341  }
342  else {
343  function_map::gsitrf(&(data_.options), &(data_.AC),
344  data_.relax, data_.panel_size, data_.etree.data(),
345  NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
346  &(data_.L), &(data_.U),
347 #ifdef HAVE_AMESOS2_SUPERLU5_API
348  &(data_.lu),
349 #endif
350  &(data_.stat), &info);
351  }
352 
353  }
354  // Cleanup. AC data will be alloc'd again for next factorization (if at all)
355  SLU::Destroy_CompCol_Permuted( &(data_.AC) );
356 
357  // Set the number of non-zero values in the L and U factors
358  this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
359  ((SLU::NCformat*)data_.U.Store)->nnz) );
360  }
361 
362  /* All processes should have the same error code */
363  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
364 
365  global_size_type info_st = as<global_size_type>(info);
366  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
367  std::runtime_error,
368  "Factorization complete, but matrix is singular. Division by zero eminent");
369  TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
370  std::runtime_error,
371  "Memory allocation failure in Superlu factorization");
372 
373  data_.options.Fact = SLU::FACTORED;
374  same_symbolic_ = true;
375 
376  if(use_triangular_solves_) {
377  triangular_solve_factor();
378  }
379 
380  return(info);
381 }
382 
383 
384 template <class Matrix, class Vector>
385 int
387  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
388 {
389  using Teuchos::as;
390 
391  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
392  const size_t nrhs = X->getGlobalNumVectors();
393 
394  { // Get values from RHS B
395 #ifdef HAVE_AMESOS2_TIMERS
396  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
397  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
398 #endif
399 
400  // In general we may want to write directly to the x space without a copy.
401  // So we 'get' x which may be a direct view assignment to the MV.
402  if(use_triangular_solves_) { // to device
403 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
404  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
405  device_solve_array_t>::do_get(X, device_xValues_,
406  as<size_t>(ld_rhs),
407  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
408  this->rowIndexBase_);
409  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
410  device_solve_array_t>::do_get(B, device_bValues_,
411  as<size_t>(ld_rhs),
412  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
413  this->rowIndexBase_);
414 #endif
415  }
416  else { // to host
417  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
418  host_solve_array_t>::do_get(X, host_xValues_,
419  as<size_t>(ld_rhs),
420  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
421  this->rowIndexBase_);
422  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
423  host_solve_array_t>::do_get(B, host_bValues_,
424  as<size_t>(ld_rhs),
425  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
426  this->rowIndexBase_);
427  }
428  }
429 
430  // If equilibration was applied at numeric, then gssvx and gsisx are going to
431  // modify B, so we can't use the optimized assignment to B since we'll change
432  // the source test vector and then fail the subsequent cycle. We need a copy.
433  // TODO: If above get_1d_copy_helper_kokkos_view already copied then we can
434  // skip this. Generally need an API which tells us what happened internally
435  // in above get_1d_copy_helper_kokkos_view - whether is was copy or assign.
436  if(data_.equed != 'N') {
437  if(use_triangular_solves_) { // to device
438 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
439  device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
440  device_bValues_.extent(0), device_bValues_.extent(1));
441  Kokkos::deep_copy(copyB, device_bValues_);
442  device_bValues_ = copyB;
443 #endif
444  }
445  else {
446  host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing("copyB"),
447  host_bValues_.extent(0), host_bValues_.extent(1));
448  Kokkos::deep_copy(copyB, host_bValues_);
449  host_bValues_ = copyB;
450  }
451  }
452 
453  int ierr = 0; // returned error code
454 
455  magnitude_type rpg, rcond;
456  if ( this->root_ ) {
457  // Note: the values of B and X (after solution) are adjusted
458  // appropriately within gssvx for row and column scalings.
459  { // Do solve!
460 #ifdef HAVE_AMESOS2_TIMERS
461  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
462 #endif
463 
464  if(use_triangular_solves_) {
465  triangular_solve();
466  }
467  else {
468  Kokkos::resize(data_.ferr, nrhs);
469  Kokkos::resize(data_.berr, nrhs);
470 
471  {
472  #ifdef HAVE_AMESOS2_TIMERS
473  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
474  #endif
475  SLU::Dtype_t dtype = type_map::dtype;
476  int i_ld_rhs = as<int>(ld_rhs);
477  function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
478  convert_bValues_, host_bValues_, i_ld_rhs,
479  SLU::SLU_DN, dtype, SLU::SLU_GE);
480  function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
481  convert_xValues_, host_xValues_, i_ld_rhs,
482  SLU::SLU_DN, dtype, SLU::SLU_GE);
483  }
484 
485  if(ILU_Flag_==false) {
486  function_map::gssvx(&(data_.options), &(data_.A),
487  data_.perm_c.data(), data_.perm_r.data(),
488  data_.etree.data(), &(data_.equed), data_.R.data(),
489  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
490  &(data_.X), &rpg, &rcond, data_.ferr.data(),
491  data_.berr.data(),
492  #ifdef HAVE_AMESOS2_SUPERLU5_API
493  &(data_.lu),
494  #endif
495  &(data_.mem_usage), &(data_.stat), &ierr);
496  }
497  else {
498  function_map::gsisx(&(data_.options), &(data_.A),
499  data_.perm_c.data(), data_.perm_r.data(),
500  data_.etree.data(), &(data_.equed), data_.R.data(),
501  data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
502  &(data_.X), &rpg, &rcond,
503  #ifdef HAVE_AMESOS2_SUPERLU5_API
504  &(data_.lu),
505  #endif
506  &(data_.mem_usage), &(data_.stat), &ierr);
507  }
508 
509  // Cleanup X and B stores
510  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
511  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
512  data_.X.Store = NULL;
513  data_.B.Store = NULL;
514  }
515 
516  } // do solve
517 
518  // convert back to Kokkos views
519  {
520 #ifdef HAVE_AMESOS2_TIMERS
521  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
522 #endif
523  function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
524  function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
525  }
526 
527  // Cleanup X and B stores
528  SLU::Destroy_SuperMatrix_Store( &(data_.X) );
529  SLU::Destroy_SuperMatrix_Store( &(data_.B) );
530  data_.X.Store = NULL;
531  data_.B.Store = NULL;
532  }
533 
534  /* All processes should have the same error code */
535  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
536 
537  global_size_type ierr_st = as<global_size_type>(ierr);
538  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
539  std::invalid_argument,
540  "Argument " << -ierr << " to SuperLU xgssvx had illegal value" );
541  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
542  std::runtime_error,
543  "Factorization complete, but U is exactly singular" );
544  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
545  std::runtime_error,
546  "SuperLU allocated " << ierr - this->globalNumCols_ << " bytes of "
547  "memory before allocation failure occured." );
548 
549  /* Update X's global values */
550  {
551 #ifdef HAVE_AMESOS2_TIMERS
552  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
553 #endif
554 
555  if(use_triangular_solves_) { // to device
556 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
557  Util::put_1d_data_helper_kokkos_view<
558  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
559  as<size_t>(ld_rhs),
560  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
561  this->rowIndexBase_);
562 #endif
563  }
564  else {
565  Util::put_1d_data_helper_kokkos_view<
566  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
567  as<size_t>(ld_rhs),
568  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
569  this->rowIndexBase_);
570  }
571 
572  }
573 
574 
575  return(ierr);
576 }
577 
578 
579 template <class Matrix, class Vector>
580 bool
582 {
583  // The Superlu factorization routines can handle square as well as
584  // rectangular matrices, but Superlu can only apply the solve routines to
585  // square matrices, so we check the matrix for squareness.
586  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
587 }
588 
589 
590 template <class Matrix, class Vector>
591 void
592 Superlu<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
593 {
594  using Teuchos::RCP;
595  using Teuchos::getIntegralValue;
596  using Teuchos::ParameterEntryValidator;
597 
598  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
599 
600  ILU_Flag_ = parameterList->get<bool>("ILU_Flag",false);
601  if (ILU_Flag_) {
602  SLU::ilu_set_default_options(&(data_.options));
603  // Override some default options
604  data_.options.PrintStat = SLU::NO;
605  }
606 
607  data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
608  // The SuperLU transpose option can override the Amesos2 option
609  if( parameterList->isParameter("Trans") ){
610  RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
611  parameterList->getEntry("Trans").setValidator(trans_validator);
612 
613  data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList, "Trans");
614  }
615 
616  if( parameterList->isParameter("IterRefine") ){
617  RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry("IterRefine").validator();
618  parameterList->getEntry("IterRefine").setValidator(refine_validator);
619 
620  data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList, "IterRefine");
621  }
622 
623  if( parameterList->isParameter("ColPerm") ){
624  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
625  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
626 
627  data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList, "ColPerm");
628  }
629 
630  data_.options.DiagPivotThresh = parameterList->get<double>("DiagPivotThresh", 1.0);
631 
632  bool equil = parameterList->get<bool>("Equil", true);
633  data_.options.Equil = equil ? SLU::YES : SLU::NO;
634 
635  bool symmetric_mode = parameterList->get<bool>("SymmetricMode", false);
636  data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
637 
638  // ILU parameters
639  if( parameterList->isParameter("RowPerm") ){
640  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
641  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
642  data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList, "RowPerm");
643  }
644 
645  /*if( parameterList->isParameter("ILU_DropRule") ) {
646  RCP<const ParameterEntryValidator> droprule_validator = valid_params->getEntry("ILU_DropRule").validator();
647  parameterList->getEntry("ILU_DropRule").setValidator(droprule_validator);
648  data_.options.ILU_DropRule = getIntegralValue<SLU::rule_t>(*parameterList, "ILU_DropRule");
649  }*/
650 
651  data_.options.ILU_DropTol = parameterList->get<double>("ILU_DropTol", 0.0001);
652 
653  data_.options.ILU_FillFactor = parameterList->get<double>("ILU_FillFactor", 10.0);
654 
655  if( parameterList->isParameter("ILU_Norm") ) {
656  RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry("ILU_Norm").validator();
657  parameterList->getEntry("ILU_Norm").setValidator(norm_validator);
658  data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList, "ILU_Norm");
659  }
660 
661  if( parameterList->isParameter("ILU_MILU") ) {
662  RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry("ILU_MILU").validator();
663  parameterList->getEntry("ILU_MILU").setValidator(milu_validator);
664  data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList, "ILU_MILU");
665  }
666 
667  data_.options.ILU_FillTol = parameterList->get<double>("ILU_FillTol", 0.01);
668 
669  is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
670  use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
671 
672  if(use_triangular_solves_) {
673 #if not defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) || not defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
674  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
675  "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
676 #endif
677  }
678 }
679 
680 
681 template <class Matrix, class Vector>
682 Teuchos::RCP<const Teuchos::ParameterList>
684 {
685  using std::string;
686  using Teuchos::tuple;
687  using Teuchos::ParameterList;
688  using Teuchos::EnhancedNumberValidator;
689  using Teuchos::setStringToIntegralParameter;
690  using Teuchos::stringToIntegralParameterEntryValidator;
691 
692  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
693 
694  if( is_null(valid_params) ){
695  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
696 
697  setStringToIntegralParameter<SLU::trans_t>("Trans", "NOTRANS",
698  "Solve for the transpose system or not",
699  tuple<string>("TRANS","NOTRANS","CONJ"),
700  tuple<string>("Solve with transpose",
701  "Do not solve with transpose",
702  "Solve with the conjugate transpose"),
703  tuple<SLU::trans_t>(SLU::TRANS,
704  SLU::NOTRANS,
705  SLU::CONJ),
706  pl.getRawPtr());
707 
708  setStringToIntegralParameter<SLU::IterRefine_t>("IterRefine", "NOREFINE",
709  "Type of iterative refinement to use",
710  tuple<string>("NOREFINE", "SLU_SINGLE", "SLU_DOUBLE"),
711  tuple<string>("Do not use iterative refinement",
712  "Do single iterative refinement",
713  "Do double iterative refinement"),
714  tuple<SLU::IterRefine_t>(SLU::NOREFINE,
715  SLU::SLU_SINGLE,
716  SLU::SLU_DOUBLE),
717  pl.getRawPtr());
718 
719  // Note: MY_PERMC not yet supported
720  setStringToIntegralParameter<SLU::colperm_t>("ColPerm", "COLAMD",
721  "Specifies how to permute the columns of the "
722  "matrix for sparsity preservation",
723  tuple<string>("NATURAL", "MMD_AT_PLUS_A",
724  "MMD_ATA", "COLAMD"),
725  tuple<string>("Natural ordering",
726  "Minimum degree ordering on A^T + A",
727  "Minimum degree ordering on A^T A",
728  "Approximate minimum degree column ordering"),
729  tuple<SLU::colperm_t>(SLU::NATURAL,
730  SLU::MMD_AT_PLUS_A,
731  SLU::MMD_ATA,
732  SLU::COLAMD),
733  pl.getRawPtr());
734 
735  Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
736  = Teuchos::rcp( new EnhancedNumberValidator<double>(0.0, 1.0) );
737  pl->set("DiagPivotThresh", 1.0,
738  "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
739  diag_pivot_thresh_validator); // partial pivoting
740 
741  pl->set("Equil", true, "Whether to equilibrate the system before solve");
742 
743  pl->set("SymmetricMode", false,
744  "Specifies whether to use the symmetric mode. "
745  "Gives preference to diagonal pivots and uses "
746  "an (A^T + A)-based column permutation.");
747 
748  // ILU parameters
749 
750  setStringToIntegralParameter<SLU::rowperm_t>("RowPerm", "LargeDiag",
751  "Type of row permutation strategy to use",
752  tuple<string>("NOROWPERM","LargeDiag","MY_PERMR"),
753  tuple<string>("Use natural ordering",
754  "Use weighted bipartite matching algorithm",
755  "Use the ordering given in perm_r input"),
756  tuple<SLU::rowperm_t>(SLU::NOROWPERM,
757 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
758  SLU::LargeDiag_MC64,
759 #else
760  SLU::LargeDiag,
761 #endif
762  SLU::MY_PERMR),
763  pl.getRawPtr());
764 
765  /*setStringToIntegralParameter<SLU::rule_t>("ILU_DropRule", "DROP_BASIC",
766  "Type of dropping strategy to use",
767  tuple<string>("DROP_BASIC","DROP_PROWS",
768  "DROP_COLUMN","DROP_AREA",
769  "DROP_DYNAMIC","DROP_INTERP"),
770  tuple<string>("ILUTP(t)","ILUTP(p,t)",
771  "Variant of ILUTP(p,t) for j-th column",
772  "Variant of ILUTP to control memory",
773  "Dynamically adjust threshold",
774  "Compute second dropping threshold by interpolation"),
775  tuple<SLU::rule_t>(SLU::DROP_BASIC,SLU::DROP_PROWS,SLU::DROP_COLUMN,
776  SLU::DROP_AREA,SLU::DROP_DYNAMIC,SLU::DROP_INTERP),
777  pl.getRawPtr());*/
778 
779  pl->set("ILU_DropTol", 0.0001, "ILUT drop tolerance");
780 
781  pl->set("ILU_FillFactor", 10.0, "ILUT fill factor");
782 
783  setStringToIntegralParameter<SLU::norm_t>("ILU_Norm", "INF_NORM",
784  "Type of norm to use",
785  tuple<string>("ONE_NORM","TWO_NORM","INF_NORM"),
786  tuple<string>("1-norm","2-norm","inf-norm"),
787  tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
788  pl.getRawPtr());
789 
790  setStringToIntegralParameter<SLU::milu_t>("ILU_MILU", "SILU",
791  "Type of modified ILU to use",
792  tuple<string>("SILU","SMILU_1","SMILU_2","SMILU_3"),
793  tuple<string>("Regular ILU","MILU 1","MILU 2","MILU 3"),
794  tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
795  SLU::SMILU_3),
796  pl.getRawPtr());
797 
798  pl->set("ILU_FillTol", 0.01, "ILUT fill tolerance");
799 
800  pl->set("ILU_Flag", false, "ILU flag: if true, run ILU routines");
801 
802  pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
803 
804  pl->set("IsContiguous", true, "Whether GIDs contiguous");
805 
806  valid_params = pl;
807  }
808 
809  return valid_params;
810 }
811 
812 
813 template <class Matrix, class Vector>
814 bool
816 {
817  using Teuchos::as;
818 
819 #ifdef HAVE_AMESOS2_TIMERS
820  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
821 #endif
822 
823  // SuperLU does not need the matrix at symbolicFactorization()
824  if( current_phase == SYMBFACT ) return false;
825 
826  // Cleanup old store memory if it's non-NULL (should only ever be non-NULL at root_)
827  if( data_.A.Store != NULL ){
828  SLU::Destroy_SuperMatrix_Store( &(data_.A) );
829  data_.A.Store = NULL;
830  }
831 
832  // Only the root image needs storage allocated
833  if( this->root_ ){
834  Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
835  Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
836  Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
837  }
838 
839  int nnz_ret = 0;
840  {
841 #ifdef HAVE_AMESOS2_TIMERS
842  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
843 #endif
844 
845  TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
846  std::runtime_error,
847  "Row and column maps have different indexbase ");
848 
849  if ( is_contiguous_ == true ) {
850  Util::get_ccs_helper_kokkos_view<
851  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
852  host_size_type_array>::do_get(this->matrixA_.ptr(),
853  host_nzvals_view_, host_rows_view_,
854  host_col_ptr_view_, nnz_ret, ROOTED,
855  ARBITRARY,
856  this->rowIndexBase_);
857  }
858  else {
859  Util::get_ccs_helper_kokkos_view<
860  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
861  host_size_type_array>::do_get(this->matrixA_.ptr(),
862  host_nzvals_view_, host_rows_view_,
863  host_col_ptr_view_, nnz_ret, CONTIGUOUS_AND_ROOTED,
864  ARBITRARY,
865  this->rowIndexBase_);
866  }
867  }
868 
869  // Get the SLU data type for this type of matrix
870  SLU::Dtype_t dtype = type_map::dtype;
871 
872  if( this->root_ ){
873  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
874  std::runtime_error,
875  "Did not get the expected number of non-zero vals");
876 
877  function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
878  this->globalNumRows_, this->globalNumCols_,
879  nnz_ret,
880  convert_nzvals_, host_nzvals_view_,
881  host_rows_view_.data(),
882  host_col_ptr_view_.data(),
883  SLU::SLU_NC, dtype, SLU::SLU_GE);
884  }
885 
886  return true;
887 }
888 
889 template <class Matrix, class Vector>
890 void
892 {
893 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
894  size_t ld_rhs = this->matrixA_->getGlobalNumRows();
895 
896  // convert etree to parents
897  SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
898  int nsuper = 1 + Lstore->nsuper; // # of supernodal columns
899  Kokkos::resize(data_.parents, nsuper);
900  for (int s = 0; s < nsuper; s++) {
901  int j = Lstore->sup_to_col[s+1]-1; // the last column index of this supernode
902  if (data_.etree[j] == static_cast<int>(ld_rhs)) {
903  data_.parents(s) = -1;
904  } else {
905  data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
906  }
907  }
908 
909  deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r); // will use device to permute
910  deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c); // will use device to permute
911 
912  // Create handles for U and U^T solves
913  device_khL_.create_sptrsv_handle(
914  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, ld_rhs, true);
915  device_khU_.create_sptrsv_handle(
916  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, ld_rhs, false);
917 
918  const bool u_in_csr = true; // TODO: add needed params
919  device_khU_.set_sptrsv_column_major (!u_in_csr);
920 
921  const bool merge = false; // TODO: add needed params
922  device_khL_.set_sptrsv_merge_supernodes (merge);
923  device_khU_.set_sptrsv_merge_supernodes (merge);
924 
925  // specify whether to invert diagonal blocks
926  const bool invert_diag = true; // TODO: add needed params
927  device_khL_.set_sptrsv_invert_diagonal (invert_diag);
928  device_khU_.set_sptrsv_invert_diagonal (invert_diag);
929 
930  // specify wheather to apply diagonal-inversion to off-diagonal blocks (optional, default is false)
931  const bool invert_offdiag = false; // TODO: add needed params
932  device_khL_.set_sptrsv_invert_offdiagonal (invert_offdiag);
933  device_khU_.set_sptrsv_invert_offdiagonal (invert_offdiag);
934 
935  // set etree
936  device_khL_.set_sptrsv_etree(data_.parents.data());
937  device_khU_.set_sptrsv_etree(data_.parents.data());
938 
939  // set permutation
940  device_khL_.set_sptrsv_perm(data_.perm_r.data());
941  device_khU_.set_sptrsv_perm(data_.perm_c.data());
942 
943  int block_size = -1; // TODO: add needed params
944  if (block_size >= 0) {
945  std::cout << " Block Size : " << block_size << std::endl;
946  device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
947  device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
948  }
949 
950  // Do symbolic analysis
951  KokkosSparse::Experimental::sptrsv_symbolic
952  (&device_khL_, &device_khU_, data_.L, data_.U);
953 
954  // Do numerical compute
955  KokkosSparse::Experimental::sptrsv_compute
956  (&device_khL_, &device_khU_, data_.L, data_.U);
957 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
958 }
959 
960 template <class Matrix, class Vector>
961 void
962 Superlu<Matrix,Vector>::triangular_solve() const
963 {
964 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
965  size_t ld_rhs = device_xValues_.extent(0);
966  size_t nrhs = device_xValues_.extent(1);
967 
968  Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
969  Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
970 
971  // forward pivot
972  auto local_device_bValues = device_bValues_;
973  auto local_device_trsv_perm_r = device_trsv_perm_r_;
974  auto local_device_trsv_rhs = device_trsv_rhs_;
975  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
976  KOKKOS_LAMBDA(size_t j) {
977  for(size_t k = 0; k < nrhs; ++k) {
978  local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
979  }
980  });
981 
982  for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
983  auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
984  auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
985 
986  // do L solve= - numeric (only rhs is modified) on the default device/host space
987  KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
988 
989  // do L^T solve - numeric (only rhs is modified) on the default device/host space
990  KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
991  } // end loop over rhs vectors
992 
993  // backward pivot
994  auto local_device_xValues = device_xValues_;
995  auto local_device_trsv_perm_c = device_trsv_perm_c_;
996  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
997  KOKKOS_LAMBDA(size_t j) {
998  for(size_t k = 0; k < nrhs; ++k) {
999  local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1000  }
1001  });
1002 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1003 }
1004 
1005 template<class Matrix, class Vector>
1006 const char* Superlu<Matrix,Vector>::name = "SuperLU";
1007 
1008 
1009 } // end namespace Amesos2
1010 
1011 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:815
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:241
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:117
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:581
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:683
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:386
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superlu_def.hpp:592
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:194
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
std::string description() const
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:140
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:219
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:76