Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_ShyLUBasker_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
20 #ifndef AMESOS2_SHYLUBASKER_DEF_HPP
21 #define AMESOS2_SHYLUBASKER_DEF_HPP
22 
23 #include <Teuchos_Tuple.hpp>
24 #include <Teuchos_ParameterList.hpp>
25 #include <Teuchos_StandardParameterEntryValidators.hpp>
26 
29 
30 namespace Amesos2 {
31 
32 
33 template <class Matrix, class Vector>
34 ShyLUBasker<Matrix,Vector>::ShyLUBasker(
35  Teuchos::RCP<const Matrix> A,
36  Teuchos::RCP<Vector> X,
37  Teuchos::RCP<const Vector> B )
38  : SolverCore<Amesos2::ShyLUBasker,Matrix,Vector>(A, X, B)
39  , is_contiguous_(true)
40  , use_gather_(true)
41 {
42 
43  //Nothing
44 
45  // Override some default options
46  // TODO: use data_ here to init
47 #if defined(HAVE_AMESOS2_KOKKOS) && defined(KOKKOS_ENABLE_OPENMP)
48  /*
49  static_assert(std::is_same<kokkos_exe,Kokkos::OpenMP>::value,
50  "Kokkos node type not supported by experimental ShyLUBasker Amesos2");
51  */
52  ShyLUbasker = new ::BaskerNS::BaskerTrilinosInterface<local_ordinal_type, shylubasker_dtype, Exe_Space>();
53  ShyLUbasker->Options.no_pivot = BASKER_FALSE;
54  ShyLUbasker->Options.static_delayed_pivot = 0;
55  ShyLUbasker->Options.symmetric = BASKER_FALSE;
56  ShyLUbasker->Options.realloc = BASKER_TRUE;
57  ShyLUbasker->Options.verbose = BASKER_FALSE;
58  ShyLUbasker->Options.prune = BASKER_TRUE;
59  ShyLUbasker->Options.btf_matching = 2; // use cardinary matching from Trilinos, globally
60  ShyLUbasker->Options.blk_matching = 1; // use max-weight matching from Basker on each diagonal block
61  ShyLUbasker->Options.matrix_scaling = 0; // use matrix scaling on a big A block
62  ShyLUbasker->Options.min_block_size = 0; // no merging small blocks
63  ShyLUbasker->Options.amd_dom = BASKER_TRUE; // use block-wise AMD
64  ShyLUbasker->Options.use_metis = BASKER_TRUE; // use scotch/metis for ND (TODO: should METIS optional?)
65  ShyLUbasker->Options.use_nodeNDP = BASKER_TRUE; // use nodeNDP to compute ND partition
66  ShyLUbasker->Options.run_nd_on_leaves = BASKER_TRUE; // run ND on the final leaf-nodes
67  ShyLUbasker->Options.run_amd_on_leaves = BASKER_FALSE; // run AMD on the final leaf-nodes
68  ShyLUbasker->Options.transpose = BASKER_FALSE;
69  ShyLUbasker->Options.threaded_solve = BASKER_FALSE;
70  ShyLUbasker->Options.replace_zero_pivot = BASKER_TRUE;
71  ShyLUbasker->Options.replace_tiny_pivot = BASKER_FALSE;
72  ShyLUbasker->Options.verbose_matrix_out = BASKER_FALSE;
73 
74  ShyLUbasker->Options.user_fill = (double)BASKER_FILL_USER;
75  ShyLUbasker->Options.use_sequential_diag_facto = BASKER_FALSE;
76 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
77  num_threads = Kokkos::OpenMP::max_hardware_threads();
78 #else
79  num_threads = Kokkos::OpenMP::impl_max_hardware_threads();
80 #endif
81 
82 #else
83  TEUCHOS_TEST_FOR_EXCEPTION(1 != 0,
84  std::runtime_error,
85  "Amesos2_ShyLUBasker Exception: Do not have supported Kokkos node type (OpenMP) enabled for ShyLUBasker");
86 #endif
87 }
88 
89 
90 template <class Matrix, class Vector>
91 ShyLUBasker<Matrix,Vector>::~ShyLUBasker( )
92 {
93  /* ShyLUBasker will cleanup its own internal memory*/
94 #if defined(HAVE_AMESOS2_KOKKOS) && defined(KOKKOS_ENABLE_OPENMP)
95  ShyLUbasker->Finalize();
96  delete ShyLUbasker;
97 #endif
98 }
99 
100 template <class Matrix, class Vector>
101 bool
103  return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
104 }
105 
106 template<class Matrix, class Vector>
107 int
109 {
110  /* TODO: Define what it means for ShyLUBasker
111  */
112 #ifdef HAVE_AMESOS2_TIMERS
113  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
114 #endif
115 
116  return(0);
117 }
118 
119 
120 template <class Matrix, class Vector>
121 int
123 {
124 
125  int info = 0;
126  if(this->root_)
127  {
128  ShyLUbasker->SetThreads(num_threads);
129 
130 
131  // NDE: Special case
132  // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
133  // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
134  // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
135 
136  if ( single_proc_optimization() ) {
137 
138  host_ordinal_type_array sp_rowptr;
139  host_ordinal_type_array sp_colind;
140  // this needs to be checked during loadA_impl...
141  this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
142  TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
143  std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
144  this->matrixA_->returnColInd_kokkos_view(sp_colind);
145  TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
146  std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
147 
148  host_value_type_array hsp_values;
149  this->matrixA_->returnValues_kokkos_view(hsp_values);
150  shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
151  //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
152  TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
153  std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
154 
155  // In this case, colptr_, rowind_, nzvals_ are invalid
156  info = ShyLUbasker->Symbolic(this->globalNumRows_,
157  this->globalNumCols_,
158  this->globalNumNonZeros_,
159  sp_rowptr.data(),
160  sp_colind.data(),
161  sp_values,
162  true);
163 
164  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
165  std::runtime_error, "Error in ShyLUBasker Symbolic");
166  }
167  else
168  { //follow original code path if conditions not met
169  // In this case, loadA_impl updates colptr_, rowind_, nzvals_
170  shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
171  info = ShyLUbasker->Symbolic(this->globalNumRows_,
172  this->globalNumCols_,
173  this->globalNumNonZeros_,
174  colptr_view_.data(),
175  rowind_view_.data(),
176  sp_values);
177 
178  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
179  std::runtime_error, "Error in ShyLUBasker Symbolic");
180  }
181  } // end if (this->root_)
182  /*No symbolic factoriztion*/
183 
184  /* All processes should have the same error code */
185  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
186  return(info);
187 }
188 
189 
190 template <class Matrix, class Vector>
191 int
193 {
194  using Teuchos::as;
195 
196  int info = 0;
197  if ( this->root_ ){
198  { // Do factorization
199 #ifdef HAVE_AMESOS2_TIMERS
200  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
201 #endif
202 
203  // NDE: Special case
204  // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
205  // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
206  // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
207 
208  if ( single_proc_optimization() ) {
209 
210  host_ordinal_type_array sp_rowptr;
211  host_ordinal_type_array sp_colind;
212  this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
213  TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
214  std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
215  this->matrixA_->returnColInd_kokkos_view(sp_colind);
216  TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
217  std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
218 
219  host_value_type_array hsp_values;
220  this->matrixA_->returnValues_kokkos_view(hsp_values);
221  shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
222  //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
223 
224  TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
225  std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
226 
227  // In this case, colptr_, rowind_, nzvals_ are invalid
228  info = ShyLUbasker->Factor( this->globalNumRows_,
229  this->globalNumCols_,
230  this->globalNumNonZeros_,
231  sp_rowptr.data(),
232  sp_colind.data(),
233  sp_values);
234  }
235  else
236  {
237  // In this case, loadA_impl updates colptr_, rowind_, nzvals_
238  shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
239  info = ShyLUbasker->Factor(this->globalNumRows_,
240  this->globalNumCols_,
241  this->globalNumNonZeros_,
242  colptr_view_.data(),
243  rowind_view_.data(),
244  sp_values);
245  }
246 
247  //ShyLUbasker->DEBUG_PRINT();
248 
249  local_ordinal_type blnnz = local_ordinal_type(0);
250  local_ordinal_type bunnz = local_ordinal_type(0);
251  ShyLUbasker->GetLnnz(blnnz); // Add exception handling?
252  ShyLUbasker->GetUnnz(bunnz);
253 
254  // This is set after numeric factorization complete as pivoting can be used;
255  // In this case, a discrepancy between symbolic and numeric nnz total can occur.
256  this->setNnzLU( as<size_t>( blnnz + bunnz ) );
257 
258  } // end scope for timer
259  } // end if (this->root_)
260 
261  /* All processes should have the same error code */
262  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
263 
264  //global_size_type info_st = as<global_size_type>(info);
265  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
266  std::runtime_error, " ShyLUBasker::numericFactorization failed.");
267 
268  return(info);
269 }
270 
271 
272 template <class Matrix, class Vector>
273 int
275  const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
276  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
277 {
278  int ierr = 0; // returned error code
279 
280  using Teuchos::as;
281 
282  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
283  const size_t nrhs = X->getGlobalNumVectors();
284 
285  const bool ShyluBaskerTransposeRequest = this->control_.useTranspose_;
286  const bool initialize_data = true;
287  const bool do_not_initialize_data = false;
288  bool use_gather = use_gather_; // user param
289  use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
290  use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value)); // only for double or float
291  {
292 #ifdef HAVE_AMESOS2_TIMERS
293  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
294 #endif
295  if ( single_proc_optimization() && nrhs == 1 ) {
296 
297  // no msp creation
298  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
299  host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
300 
301  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
302  host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
303 
304  } // end if ( single_proc_optimization() && nrhs == 1 )
305  else {
306  if (use_gather) {
307  int rval = B->gather(bValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
308  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
309  if (rval == 0) {
310  X->gather(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
311  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
312  } else {
313  use_gather = false;
314  }
315  }
316  if (!use_gather) {
317  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
318  host_solve_array_t>::do_get(initialize_data, B, bValues_,
319  as<size_t>(ld_rhs),
320  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
321  this->rowIndexBase_);
322 
323  // See Amesos2_Tacho_def.hpp for notes on why we 'get' x here.
324  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
325  host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
326  as<size_t>(ld_rhs),
327  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
328  this->rowIndexBase_);
329  }
330  }
331  }
332 
333  if ( this->root_ ) { // do solve
334 #ifdef HAVE_AMESOS2_TIMERS
335  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
336 #endif
337 
338  shylubasker_dtype * pxValues = function_map::convert_scalar(xValues_.data());
339  shylubasker_dtype * pbValues = function_map::convert_scalar(bValues_.data());
340  if (!ShyluBaskerTransposeRequest)
341  ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues);
342  else
343  ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues, true);
344  }
345  /* All processes should have the same error code */
346  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
347 
348  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0,
349  std::runtime_error,
350  "Encountered zero diag element at: " << ierr);
351  TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1,
352  std::runtime_error,
353  "Could not alloc needed working memory for solve" );
354  {
355 #ifdef HAVE_AMESOS2_TIMERS
356  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
357 #endif
358  if (use_gather) {
359  int rval = X->scatter(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
360  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
361  if (rval != 0) use_gather = false;
362  }
363  if (!use_gather) {
364  Util::put_1d_data_helper_kokkos_view<
365  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
366  as<size_t>(ld_rhs),
367  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
368  }
369  }
370  return(ierr);
371 }
372 
373 
374 template <class Matrix, class Vector>
375 bool
377 {
378  // The ShyLUBasker can only handle square for right now
379  return( this->globalNumRows_ == this->globalNumCols_ );
380 }
381 
382 
383 template <class Matrix, class Vector>
384 void
385 ShyLUBasker<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
386 {
387  using Teuchos::RCP;
388  using Teuchos::getIntegralValue;
389  using Teuchos::ParameterEntryValidator;
390 
391  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
392 
393  if(parameterList->isParameter("IsContiguous"))
394  {
395  is_contiguous_ = parameterList->get<bool>("IsContiguous");
396  }
397 
398  if(parameterList->isParameter("UseCustomGather"))
399  {
400  use_gather_ = parameterList->get<bool>("UseCustomGather");
401  }
402 
403  if(parameterList->isParameter("num_threads"))
404  {
405  num_threads = parameterList->get<int>("num_threads");
406  }
407  if(parameterList->isParameter("pivot"))
408  {
409  ShyLUbasker->Options.no_pivot = (!parameterList->get<bool>("pivot"));
410  }
411  if(parameterList->isParameter("delayed pivot"))
412  {
413  ShyLUbasker->Options.static_delayed_pivot = (parameterList->get<int>("delayed pivot"));
414  }
415  if(parameterList->isParameter("pivot_tol"))
416  {
417  ShyLUbasker->Options.pivot_tol = parameterList->get<double>("pivot_tol");
418  }
419  if(parameterList->isParameter("symmetric"))
420  {
421  ShyLUbasker->Options.symmetric = parameterList->get<bool>("symmetric");
422  }
423  if(parameterList->isParameter("realloc"))
424  {
425  ShyLUbasker->Options.realloc = parameterList->get<bool>("realloc");
426  }
427  if(parameterList->isParameter("verbose"))
428  {
429  ShyLUbasker->Options.verbose = parameterList->get<bool>("verbose");
430  }
431  if(parameterList->isParameter("verbose_matrix"))
432  {
433  ShyLUbasker->Options.verbose_matrix_out = parameterList->get<bool>("verbose_matrix");
434  }
435  if(parameterList->isParameter("btf"))
436  {
437  ShyLUbasker->Options.btf = parameterList->get<bool>("btf");
438  }
439  if(parameterList->isParameter("use_metis"))
440  {
441  ShyLUbasker->Options.use_metis = parameterList->get<bool>("use_metis");
442  }
443  if(parameterList->isParameter("use_nodeNDP"))
444  {
445  ShyLUbasker->Options.use_nodeNDP = parameterList->get<bool>("use_nodeNDP");
446  }
447  if(parameterList->isParameter("run_nd_on_leaves"))
448  {
449  ShyLUbasker->Options.run_nd_on_leaves = parameterList->get<bool>("run_nd_on_leaves");
450  }
451  if(parameterList->isParameter("run_amd_on_leaves"))
452  {
453  ShyLUbasker->Options.run_amd_on_leaves = parameterList->get<bool>("run_amd_on_leaves");
454  }
455  if(parameterList->isParameter("amd_on_blocks"))
456  {
457  ShyLUbasker->Options.amd_dom = parameterList->get<bool>("amd_on_blocks");
458  }
459  if(parameterList->isParameter("transpose"))
460  {
461  // NDE: set transpose vs non-transpose mode as bool; track separate shylubasker objects
462  const auto transpose = parameterList->get<bool>("transpose");
463  if (transpose == true)
464  this->control_.useTranspose_ = true;
465  }
466  if(parameterList->isParameter("threaded_solve"))
467  {
468  ShyLUbasker->Options.threaded_solve = parameterList->get<bool>("threaded_solve");
469  }
470  if(parameterList->isParameter("use_sequential_diag_facto"))
471  {
472  ShyLUbasker->Options.use_sequential_diag_facto = parameterList->get<bool>("use_sequential_diag_facto");
473  }
474  if(parameterList->isParameter("user_fill"))
475  {
476  ShyLUbasker->Options.user_fill = parameterList->get<double>("user_fill");
477  }
478  if(parameterList->isParameter("prune"))
479  {
480  ShyLUbasker->Options.prune = parameterList->get<bool>("prune");
481  }
482  if(parameterList->isParameter("replace_zero_pivot"))
483  {
484  ShyLUbasker->Options.replace_zero_pivot = parameterList->get<bool>("replace_zero_pivot");
485  }
486  if(parameterList->isParameter("replace_tiny_pivot"))
487  {
488  ShyLUbasker->Options.replace_tiny_pivot = parameterList->get<bool>("replace_tiny_pivot");
489  }
490  if(parameterList->isParameter("btf_matching"))
491  {
492  ShyLUbasker->Options.btf_matching = parameterList->get<int>("btf_matching");
493  if (ShyLUbasker->Options.btf_matching == 1 || ShyLUbasker->Options.btf_matching == 2) {
494  ShyLUbasker->Options.matching = true;
495  } else {
496  ShyLUbasker->Options.matching = false;
497  }
498  }
499  if(parameterList->isParameter("blk_matching"))
500  {
501  ShyLUbasker->Options.blk_matching = parameterList->get<int>("blk_matching");
502  }
503  if(parameterList->isParameter("matrix_scaling"))
504  {
505  ShyLUbasker->Options.matrix_scaling = parameterList->get<int>("matrix_scaling");
506  }
507  if(parameterList->isParameter("min_block_size"))
508  {
509  ShyLUbasker->Options.min_block_size = parameterList->get<int>("min_block_size");
510  }
511 }
512 
513 template <class Matrix, class Vector>
514 Teuchos::RCP<const Teuchos::ParameterList>
516 {
517  using Teuchos::ParameterList;
518 
519  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
520 
521  if( is_null(valid_params) )
522  {
523  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
524  pl->set("IsContiguous", true,
525  "Are GIDs contiguous");
526  pl->set("UseCustomGather", true,
527  "Use Matrix-gather routine");
528  pl->set("num_threads", 1,
529  "Number of threads");
530  pl->set("pivot", false,
531  "Should not pivot");
532  pl->set("delayed pivot", 0,
533  "Apply static delayed pivot on a big block");
534  pl->set("pivot_tol", .0001,
535  "Tolerance before pivot, currently not used");
536  pl->set("symmetric", false,
537  "Should Symbolic assume symmetric nonzero pattern");
538  pl->set("realloc" , false,
539  "Should realloc space if not enough");
540  pl->set("verbose", false,
541  "Information about factoring");
542  pl->set("verbose_matrix", false,
543  "Give Permuted Matrices");
544  pl->set("btf", true,
545  "Use BTF ordering");
546  pl->set("prune", false,
547  "Use prune on BTF blocks (Not Supported)");
548  pl->set("btf_matching", 2,
549  "Matching option for BTF: 0 = none, 1 = Basker, 2 = Trilinos (default), (3 = MC64 if enabled)");
550  pl->set("blk_matching", 1,
551  "Matching optioon for block: 0 = none, 1 or anything else = Basker (default), (2 = MC64 if enabled)");
552  pl->set("matrix_scaling", 0,
553  "Use matrix scaling to biig A BTF block: 0 = no-scaling, 1 = symmetric diagonal scaling, 2 = row-max, and then col-max scaling");
554  pl->set("min_block_size", 0,
555  "Size of the minimum diagonal blocks");
556  pl->set("replace_zero_pivot", true,
557  "Replace zero pivots during the numerical factorization");
558  pl->set("replace_tiny_pivot", false,
559  "Replace tiny pivots during the numerical factorization");
560  pl->set("use_metis", true,
561  "Use METIS for ND");
562  pl->set("use_nodeNDP", true,
563  "Use nodeND to compute ND partition");
564  pl->set("run_nd_on_leaves", false,
565  "Run ND on the final leaf-nodes for ND factorization");
566  pl->set("run_amd_on_leaves", false,
567  "Run AMD on the final leaf-nodes for ND factorization");
568  pl->set("amd_on_blocks", true,
569  "Run AMD on each diagonal blocks");
570  pl->set("transpose", false,
571  "Solve the transpose A");
572  pl->set("threaded_solve", false,
573  "Use threads for forward/backward solves");
574  pl->set("use_sequential_diag_facto", false,
575  "Use sequential algorithm to factor each diagonal block");
576  pl->set("user_fill", (double)BASKER_FILL_USER,
577  "User-provided padding for the fill ratio");
578  valid_params = pl;
579  }
580  return valid_params;
581 }
582 
583 
584 template <class Matrix, class Vector>
585 bool
587 {
588  using Teuchos::as;
589  if(current_phase == SOLVE || current_phase == PREORDERING ) return( false );
590 
591  #ifdef HAVE_AMESOS2_TIMERS
592  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
593  #endif
594 
595 
596  // NDE: Can clean up duplicated code with the #ifdef guards
597  if ( single_proc_optimization() ) {
598  // NDE: Nothing is done in this special case - CRS raw pointers are passed to SHYLUBASKER and transpose of copies handled there
599  // In this case, colptr_, rowind_, nzvals_ are invalid
600  }
601  else
602  {
603 
604  // Only the root image needs storage allocated
605  if( this->root_ && current_phase == SYMBFACT )
606  {
607  Kokkos::resize(nzvals_view_, this->globalNumNonZeros_);
608  Kokkos::resize(rowind_view_, this->globalNumNonZeros_);
609  Kokkos::resize(colptr_view_, this->globalNumCols_ + 1); //this will be wrong for case of gapped col ids, e.g. 0,2,4,9; num_cols = 10 ([0,10)) but num GIDs = 4...
610  }
611 
612  local_ordinal_type nnz_ret = -1;
613  bool use_gather = use_gather_; // user param
614  use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
615  use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value)); // only for double or float
616  {
617  #ifdef HAVE_AMESOS2_TIMERS
618  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
619  #endif
620  if (use_gather) {
621  bool column_major = true;
622  if (!is_contiguous_) {
623  auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase);
624  nnz_ret = contig_mat->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
625  this->transpose_map, this->nzvals_t, column_major, current_phase);
626  } else {
627  nnz_ret = this->matrixA_->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
628  this->transpose_map, this->nzvals_t, column_major, current_phase);
629  }
630  // gather failed (e.g., not implemened for KokkosCrsMatrix)
631  // in case of the failure, it falls back to the original "do_get"
632  if (nnz_ret < 0) use_gather = false;
633  }
634  if (!use_gather) {
636  MatrixAdapter<Matrix>, host_value_type_array, host_ordinal_type_array, host_ordinal_type_array>
637  ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret,
638  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
639  ARBITRARY,
640  this->rowIndexBase_); // copies from matrixA_ to ShyLUBasker ConcreteSolver cp, ri, nzval members
641  }
642  }
643 
644  // gather return the total nnz_ret on every MPI process
645  if (use_gather || this->root_) {
646  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
647  std::runtime_error,
648  "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals("
649  +std::to_string(nnz_ret)+" vs "+std::to_string(this->globalNumNonZeros_)+")");
650  }
651  } //end alternative path
652  return true;
653 }
654 
655 
656 template<class Matrix, class Vector>
657 const char* ShyLUBasker<Matrix,Vector>::name = "ShyLUBasker";
658 
659 
660 } // end namespace Amesos2
661 
662 #endif // AMESOS2_SHYLUBASKER_DEF_HPP
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:618
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
ShyLUBasker specific solve.
Definition: Amesos2_ShyLUBasker_def.hpp:274
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Amesos2 interface to the Baker package.
Definition: Amesos2_ShyLUBasker_decl.hpp:42
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_ShyLUBasker_def.hpp:515
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_ShyLUBasker_def.hpp:108
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_ShyLUBasker_def.hpp:376
Amesos2 ShyLUBasker declarations.
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_ShyLUBasker_def.hpp:586
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
int numericFactorization_impl()
ShyLUBasker specific numeric factorization.
Definition: Amesos2_ShyLUBasker_def.hpp:192
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition: Amesos2_ShyLUBasker_def.hpp:102