Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_ShyLUBasker_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
20 #ifndef AMESOS2_SHYLUBASKER_DEF_HPP
21 #define AMESOS2_SHYLUBASKER_DEF_HPP
22 
23 #include <Teuchos_Tuple.hpp>
24 #include <Teuchos_ParameterList.hpp>
25 #include <Teuchos_StandardParameterEntryValidators.hpp>
26 
29 
30 namespace Amesos2 {
31 
32 
33 template <class Matrix, class Vector>
34 ShyLUBasker<Matrix,Vector>::ShyLUBasker(
35  Teuchos::RCP<const Matrix> A,
36  Teuchos::RCP<Vector> X,
37  Teuchos::RCP<const Vector> B )
38  : SolverCore<Amesos2::ShyLUBasker,Matrix,Vector>(A, X, B)
39  , is_contiguous_(true)
40  , use_gather_(true)
41 {
42 
43  //Nothing
44 
45  // Override some default options
46  // TODO: use data_ here to init
47 #if defined(HAVE_AMESOS2_KOKKOS) && defined(KOKKOS_ENABLE_OPENMP)
48  /*
49  static_assert(std::is_same<kokkos_exe,Kokkos::OpenMP>::value,
50  "Kokkos node type not supported by experimental ShyLUBasker Amesos2");
51  */
52  ShyLUbasker = new ::BaskerNS::BaskerTrilinosInterface<local_ordinal_type, shylubasker_dtype, Exe_Space>();
53  ShyLUbasker->Options.no_pivot = BASKER_FALSE;
54  ShyLUbasker->Options.static_delayed_pivot = 0;
55  ShyLUbasker->Options.symmetric = BASKER_FALSE;
56  ShyLUbasker->Options.realloc = BASKER_TRUE;
57  ShyLUbasker->Options.verbose = BASKER_FALSE;
58  ShyLUbasker->Options.prune = BASKER_TRUE;
59  ShyLUbasker->Options.btf_matching = 2; // use cardinary matching from Trilinos, globally
60  ShyLUbasker->Options.blk_matching = 1; // use max-weight matching from Basker on each diagonal block
61  ShyLUbasker->Options.matrix_scaling = 0; // use matrix scaling on a big A block
62  ShyLUbasker->Options.min_block_size = 0; // no merging small blocks
63  ShyLUbasker->Options.amd_dom = BASKER_TRUE; // use block-wise AMD
64  ShyLUbasker->Options.use_metis = BASKER_TRUE; // use scotch/metis for ND (TODO: should METIS optional?)
65  ShyLUbasker->Options.use_nodeNDP = BASKER_TRUE; // use nodeNDP to compute ND partition
66  ShyLUbasker->Options.run_nd_on_leaves = BASKER_TRUE; // run ND on the final leaf-nodes
67  ShyLUbasker->Options.run_amd_on_leaves = BASKER_FALSE; // run AMD on the final leaf-nodes
68  ShyLUbasker->Options.transpose = BASKER_FALSE;
69  ShyLUbasker->Options.replace_zero_pivot = BASKER_TRUE;
70  ShyLUbasker->Options.replace_tiny_pivot = BASKER_FALSE;
71  ShyLUbasker->Options.verbose_matrix_out = BASKER_FALSE;
72 
73  ShyLUbasker->Options.user_fill = (double)BASKER_FILL_USER;
74  ShyLUbasker->Options.use_sequential_diag_facto = BASKER_FALSE;
75 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
76  num_threads = Kokkos::OpenMP::max_hardware_threads();
77 #else
78  num_threads = Kokkos::OpenMP::impl_max_hardware_threads();
79 #endif
80 
81 #else
82  TEUCHOS_TEST_FOR_EXCEPTION(1 != 0,
83  std::runtime_error,
84  "Amesos2_ShyLUBasker Exception: Do not have supported Kokkos node type (OpenMP) enabled for ShyLUBasker");
85 #endif
86 }
87 
88 
89 template <class Matrix, class Vector>
90 ShyLUBasker<Matrix,Vector>::~ShyLUBasker( )
91 {
92  /* ShyLUBasker will cleanup its own internal memory*/
93 #if defined(HAVE_AMESOS2_KOKKOS) && defined(KOKKOS_ENABLE_OPENMP)
94  ShyLUbasker->Finalize();
95  delete ShyLUbasker;
96 #endif
97 }
98 
99 template <class Matrix, class Vector>
100 bool
102  return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
103 }
104 
105 template<class Matrix, class Vector>
106 int
108 {
109  /* TODO: Define what it means for ShyLUBasker
110  */
111 #ifdef HAVE_AMESOS2_TIMERS
112  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
113 #endif
114 
115  return(0);
116 }
117 
118 
119 template <class Matrix, class Vector>
120 int
122 {
123 
124  int info = 0;
125  if(this->root_)
126  {
127  ShyLUbasker->SetThreads(num_threads);
128 
129 
130  // NDE: Special case
131  // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
132  // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
133  // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
134 
135  if ( single_proc_optimization() ) {
136 
137  host_ordinal_type_array sp_rowptr;
138  host_ordinal_type_array sp_colind;
139  // this needs to be checked during loadA_impl...
140  this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
141  TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
142  std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
143  this->matrixA_->returnColInd_kokkos_view(sp_colind);
144  TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
145  std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
146 
147  host_value_type_array hsp_values;
148  this->matrixA_->returnValues_kokkos_view(hsp_values);
149  shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
150  //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
151  TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
152  std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
153 
154  // In this case, colptr_, rowind_, nzvals_ are invalid
155  info = ShyLUbasker->Symbolic(this->globalNumRows_,
156  this->globalNumCols_,
157  this->globalNumNonZeros_,
158  sp_rowptr.data(),
159  sp_colind.data(),
160  sp_values,
161  true);
162 
163  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
164  std::runtime_error, "Error in ShyLUBasker Symbolic");
165  }
166  else
167  { //follow original code path if conditions not met
168  // In this case, loadA_impl updates colptr_, rowind_, nzvals_
169  shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
170  info = ShyLUbasker->Symbolic(this->globalNumRows_,
171  this->globalNumCols_,
172  this->globalNumNonZeros_,
173  colptr_view_.data(),
174  rowind_view_.data(),
175  sp_values);
176 
177  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
178  std::runtime_error, "Error in ShyLUBasker Symbolic");
179  }
180  } // end if (this->root_)
181  /*No symbolic factoriztion*/
182 
183  /* All processes should have the same error code */
184  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
185  return(info);
186 }
187 
188 
189 template <class Matrix, class Vector>
190 int
192 {
193  using Teuchos::as;
194 
195  int info = 0;
196  if ( this->root_ ){
197  { // Do factorization
198 #ifdef HAVE_AMESOS2_TIMERS
199  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
200 #endif
201 
202  // NDE: Special case
203  // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
204  // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
205  // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
206 
207  if ( single_proc_optimization() ) {
208 
209  host_ordinal_type_array sp_rowptr;
210  host_ordinal_type_array sp_colind;
211  this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
212  TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
213  std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
214  this->matrixA_->returnColInd_kokkos_view(sp_colind);
215  TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
216  std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
217 
218  host_value_type_array hsp_values;
219  this->matrixA_->returnValues_kokkos_view(hsp_values);
220  shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
221  //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
222 
223  TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
224  std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
225 
226  // In this case, colptr_, rowind_, nzvals_ are invalid
227  info = ShyLUbasker->Factor( this->globalNumRows_,
228  this->globalNumCols_,
229  this->globalNumNonZeros_,
230  sp_rowptr.data(),
231  sp_colind.data(),
232  sp_values);
233  }
234  else
235  {
236  // In this case, loadA_impl updates colptr_, rowind_, nzvals_
237  shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
238  info = ShyLUbasker->Factor(this->globalNumRows_,
239  this->globalNumCols_,
240  this->globalNumNonZeros_,
241  colptr_view_.data(),
242  rowind_view_.data(),
243  sp_values);
244  }
245 
246  //ShyLUbasker->DEBUG_PRINT();
247 
248  local_ordinal_type blnnz = local_ordinal_type(0);
249  local_ordinal_type bunnz = local_ordinal_type(0);
250  ShyLUbasker->GetLnnz(blnnz); // Add exception handling?
251  ShyLUbasker->GetUnnz(bunnz);
252 
253  // This is set after numeric factorization complete as pivoting can be used;
254  // In this case, a discrepancy between symbolic and numeric nnz total can occur.
255  this->setNnzLU( as<size_t>( blnnz + bunnz ) );
256 
257  } // end scope for timer
258  } // end if (this->root_)
259 
260  /* All processes should have the same error code */
261  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
262 
263  //global_size_type info_st = as<global_size_type>(info);
264  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
265  std::runtime_error, " ShyLUBasker::numericFactorization failed.");
266 
267  return(info);
268 }
269 
270 
271 template <class Matrix, class Vector>
272 int
274  const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
275  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
276 {
277  int ierr = 0; // returned error code
278 
279  using Teuchos::as;
280 
281  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
282  const size_t nrhs = X->getGlobalNumVectors();
283 
284  const bool ShyluBaskerTransposeRequest = this->control_.useTranspose_;
285  const bool initialize_data = true;
286  const bool do_not_initialize_data = false;
287  bool use_gather = use_gather_; // user param
288  use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
289  use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value)); // only for double or float
290  {
291 #ifdef HAVE_AMESOS2_TIMERS
292  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
293 #endif
294  if ( single_proc_optimization() && nrhs == 1 ) {
295 
296  // no msp creation
297  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
298  host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
299 
300  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
301  host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
302 
303  } // end if ( single_proc_optimization() && nrhs == 1 )
304  else {
305  if (use_gather) {
306  int rval = B->gather(bValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
307  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
308  if (rval == 0) {
309  X->gather(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
310  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
311  } else {
312  use_gather = false;
313  }
314  }
315  if (!use_gather) {
316  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
317  host_solve_array_t>::do_get(initialize_data, B, bValues_,
318  as<size_t>(ld_rhs),
319  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
320  this->rowIndexBase_);
321 
322  // See Amesos2_Tacho_def.hpp for notes on why we 'get' x here.
323  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
324  host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
325  as<size_t>(ld_rhs),
326  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
327  this->rowIndexBase_);
328  }
329  }
330  }
331 
332  if ( this->root_ ) { // do solve
333 #ifdef HAVE_AMESOS2_TIMERS
334  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
335 #endif
336 
337  shylubasker_dtype * pxValues = function_map::convert_scalar(xValues_.data());
338  shylubasker_dtype * pbValues = function_map::convert_scalar(bValues_.data());
339  if (!ShyluBaskerTransposeRequest)
340  ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues);
341  else
342  ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues, true);
343  }
344  /* All processes should have the same error code */
345  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
346 
347  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0,
348  std::runtime_error,
349  "Encountered zero diag element at: " << ierr);
350  TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1,
351  std::runtime_error,
352  "Could not alloc needed working memory for solve" );
353  {
354 #ifdef HAVE_AMESOS2_TIMERS
355  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
356 #endif
357  if (use_gather) {
358  int rval = X->scatter(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
359  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
360  if (rval != 0) use_gather = false;
361  }
362  if (!use_gather) {
363  Util::put_1d_data_helper_kokkos_view<
364  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
365  as<size_t>(ld_rhs),
366  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
367  }
368  }
369  return(ierr);
370 }
371 
372 
373 template <class Matrix, class Vector>
374 bool
376 {
377  // The ShyLUBasker can only handle square for right now
378  return( this->globalNumRows_ == this->globalNumCols_ );
379 }
380 
381 
382 template <class Matrix, class Vector>
383 void
384 ShyLUBasker<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
385 {
386  using Teuchos::RCP;
387  using Teuchos::getIntegralValue;
388  using Teuchos::ParameterEntryValidator;
389 
390  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
391 
392  if(parameterList->isParameter("IsContiguous"))
393  {
394  is_contiguous_ = parameterList->get<bool>("IsContiguous");
395  }
396 
397  if(parameterList->isParameter("UseCustomGather"))
398  {
399  use_gather_ = parameterList->get<bool>("UseCustomGather");
400  }
401 
402  if(parameterList->isParameter("num_threads"))
403  {
404  num_threads = parameterList->get<int>("num_threads");
405  }
406  if(parameterList->isParameter("pivot"))
407  {
408  ShyLUbasker->Options.no_pivot = (!parameterList->get<bool>("pivot"));
409  }
410  if(parameterList->isParameter("delayed pivot"))
411  {
412  ShyLUbasker->Options.static_delayed_pivot = (parameterList->get<int>("delayed pivot"));
413  }
414  if(parameterList->isParameter("pivot_tol"))
415  {
416  ShyLUbasker->Options.pivot_tol = parameterList->get<double>("pivot_tol");
417  }
418  if(parameterList->isParameter("symmetric"))
419  {
420  ShyLUbasker->Options.symmetric = parameterList->get<bool>("symmetric");
421  }
422  if(parameterList->isParameter("realloc"))
423  {
424  ShyLUbasker->Options.realloc = parameterList->get<bool>("realloc");
425  }
426  if(parameterList->isParameter("verbose"))
427  {
428  ShyLUbasker->Options.verbose = parameterList->get<bool>("verbose");
429  }
430  if(parameterList->isParameter("verbose_matrix"))
431  {
432  ShyLUbasker->Options.verbose_matrix_out = parameterList->get<bool>("verbose_matrix");
433  }
434  if(parameterList->isParameter("btf"))
435  {
436  ShyLUbasker->Options.btf = parameterList->get<bool>("btf");
437  }
438  if(parameterList->isParameter("use_metis"))
439  {
440  ShyLUbasker->Options.use_metis = parameterList->get<bool>("use_metis");
441  }
442  if(parameterList->isParameter("use_nodeNDP"))
443  {
444  ShyLUbasker->Options.use_nodeNDP = parameterList->get<bool>("use_nodeNDP");
445  }
446  if(parameterList->isParameter("run_nd_on_leaves"))
447  {
448  ShyLUbasker->Options.run_nd_on_leaves = parameterList->get<bool>("run_nd_on_leaves");
449  }
450  if(parameterList->isParameter("run_amd_on_leaves"))
451  {
452  ShyLUbasker->Options.run_amd_on_leaves = parameterList->get<bool>("run_amd_on_leaves");
453  }
454  if(parameterList->isParameter("amd_on_blocks"))
455  {
456  ShyLUbasker->Options.amd_dom = parameterList->get<bool>("amd_on_blocks");
457  }
458  if(parameterList->isParameter("transpose"))
459  {
460  // NDE: set transpose vs non-transpose mode as bool; track separate shylubasker objects
461  const auto transpose = parameterList->get<bool>("transpose");
462  if (transpose == true)
463  this->control_.useTranspose_ = true;
464  }
465  if(parameterList->isParameter("use_sequential_diag_facto"))
466  {
467  ShyLUbasker->Options.use_sequential_diag_facto = parameterList->get<bool>("use_sequential_diag_facto");
468  }
469  if(parameterList->isParameter("user_fill"))
470  {
471  ShyLUbasker->Options.user_fill = parameterList->get<double>("user_fill");
472  }
473  if(parameterList->isParameter("prune"))
474  {
475  ShyLUbasker->Options.prune = parameterList->get<bool>("prune");
476  }
477  if(parameterList->isParameter("replace_zero_pivot"))
478  {
479  ShyLUbasker->Options.replace_zero_pivot = parameterList->get<bool>("replace_zero_pivot");
480  }
481  if(parameterList->isParameter("replace_tiny_pivot"))
482  {
483  ShyLUbasker->Options.replace_tiny_pivot = parameterList->get<bool>("replace_tiny_pivot");
484  }
485  if(parameterList->isParameter("btf_matching"))
486  {
487  ShyLUbasker->Options.btf_matching = parameterList->get<int>("btf_matching");
488  if (ShyLUbasker->Options.btf_matching == 1 || ShyLUbasker->Options.btf_matching == 2) {
489  ShyLUbasker->Options.matching = true;
490  } else {
491  ShyLUbasker->Options.matching = false;
492  }
493  }
494  if(parameterList->isParameter("blk_matching"))
495  {
496  ShyLUbasker->Options.blk_matching = parameterList->get<int>("blk_matching");
497  }
498  if(parameterList->isParameter("matrix_scaling"))
499  {
500  ShyLUbasker->Options.matrix_scaling = parameterList->get<int>("matrix_scaling");
501  }
502  if(parameterList->isParameter("min_block_size"))
503  {
504  ShyLUbasker->Options.min_block_size = parameterList->get<int>("min_block_size");
505  }
506 }
507 
508 template <class Matrix, class Vector>
509 Teuchos::RCP<const Teuchos::ParameterList>
511 {
512  using Teuchos::ParameterList;
513 
514  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
515 
516  if( is_null(valid_params) )
517  {
518  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
519  pl->set("IsContiguous", true,
520  "Are GIDs contiguous");
521  pl->set("UseCustomGather", true,
522  "Use Matrix-gather routine");
523  pl->set("num_threads", 1,
524  "Number of threads");
525  pl->set("pivot", false,
526  "Should not pivot");
527  pl->set("delayed pivot", 0,
528  "Apply static delayed pivot on a big block");
529  pl->set("pivot_tol", .0001,
530  "Tolerance before pivot, currently not used");
531  pl->set("symmetric", false,
532  "Should Symbolic assume symmetric nonzero pattern");
533  pl->set("realloc" , false,
534  "Should realloc space if not enough");
535  pl->set("verbose", false,
536  "Information about factoring");
537  pl->set("verbose_matrix", false,
538  "Give Permuted Matrices");
539  pl->set("btf", true,
540  "Use BTF ordering");
541  pl->set("prune", false,
542  "Use prune on BTF blocks (Not Supported)");
543  pl->set("btf_matching", 2,
544  "Matching option for BTF: 0 = none, 1 = Basker, 2 = Trilinos (default), (3 = MC64 if enabled)");
545  pl->set("blk_matching", 1,
546  "Matching optioon for block: 0 = none, 1 or anything else = Basker (default), (2 = MC64 if enabled)");
547  pl->set("matrix_scaling", 0,
548  "Use matrix scaling to biig A BTF block: 0 = no-scaling, 1 = symmetric diagonal scaling, 2 = row-max, and then col-max scaling");
549  pl->set("min_block_size", 0,
550  "Size of the minimum diagonal blocks");
551  pl->set("replace_zero_pivot", true,
552  "Replace zero pivots during the numerical factorization");
553  pl->set("replace_tiny_pivot", false,
554  "Replace tiny pivots during the numerical factorization");
555  pl->set("use_metis", true,
556  "Use METIS for ND");
557  pl->set("use_nodeNDP", true,
558  "Use nodeND to compute ND partition");
559  pl->set("run_nd_on_leaves", false,
560  "Run ND on the final leaf-nodes for ND factorization");
561  pl->set("run_amd_on_leaves", false,
562  "Run AMD on the final leaf-nodes for ND factorization");
563  pl->set("amd_on_blocks", true,
564  "Run AMD on each diagonal blocks");
565  pl->set("transpose", false,
566  "Solve the transpose A");
567  pl->set("use_sequential_diag_facto", false,
568  "Use sequential algorithm to factor each diagonal block");
569  pl->set("user_fill", (double)BASKER_FILL_USER,
570  "User-provided padding for the fill ratio");
571  valid_params = pl;
572  }
573  return valid_params;
574 }
575 
576 
577 template <class Matrix, class Vector>
578 bool
580 {
581  using Teuchos::as;
582  if(current_phase == SOLVE || current_phase == PREORDERING ) return( false );
583 
584  #ifdef HAVE_AMESOS2_TIMERS
585  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
586  #endif
587 
588 
589  // NDE: Can clean up duplicated code with the #ifdef guards
590  if ( single_proc_optimization() ) {
591  // NDE: Nothing is done in this special case - CRS raw pointers are passed to SHYLUBASKER and transpose of copies handled there
592  // In this case, colptr_, rowind_, nzvals_ are invalid
593  }
594  else
595  {
596 
597  // Only the root image needs storage allocated
598  if( this->root_ && current_phase == SYMBFACT )
599  {
600  Kokkos::resize(nzvals_view_, this->globalNumNonZeros_);
601  Kokkos::resize(rowind_view_, this->globalNumNonZeros_);
602  Kokkos::resize(colptr_view_, this->globalNumCols_ + 1); //this will be wrong for case of gapped col ids, e.g. 0,2,4,9; num_cols = 10 ([0,10)) but num GIDs = 4...
603  }
604 
605  local_ordinal_type nnz_ret = -1;
606  bool use_gather = use_gather_; // user param
607  use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
608  use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value)); // only for double or float
609  {
610  #ifdef HAVE_AMESOS2_TIMERS
611  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
612  #endif
613  if (use_gather) {
614  bool column_major = true;
615  if (!is_contiguous_) {
616  auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase);
617  nnz_ret = contig_mat->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
618  this->transpose_map, this->nzvals_t, column_major, current_phase);
619  } else {
620  nnz_ret = this->matrixA_->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
621  this->transpose_map, this->nzvals_t, column_major, current_phase);
622  }
623  // gather failed (e.g., not implemened for KokkosCrsMatrix)
624  // in case of the failure, it falls back to the original "do_get"
625  if (nnz_ret < 0) use_gather = false;
626  }
627  if (!use_gather) {
629  MatrixAdapter<Matrix>, host_value_type_array, host_ordinal_type_array, host_ordinal_type_array>
630  ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret,
631  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
632  ARBITRARY,
633  this->rowIndexBase_); // copies from matrixA_ to ShyLUBasker ConcreteSolver cp, ri, nzval members
634  }
635  }
636 
637  // gather return the total nnz_ret on every MPI process
638  if (use_gather || this->root_) {
639  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
640  std::runtime_error,
641  "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals("
642  +std::to_string(nnz_ret)+" vs "+std::to_string(this->globalNumNonZeros_)+")");
643  }
644  } //end alternative path
645  return true;
646 }
647 
648 
649 template<class Matrix, class Vector>
650 const char* ShyLUBasker<Matrix,Vector>::name = "ShyLUBasker";
651 
652 
653 } // end namespace Amesos2
654 
655 #endif // AMESOS2_SHYLUBASKER_DEF_HPP
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
ShyLUBasker specific solve.
Definition: Amesos2_ShyLUBasker_def.hpp:273
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Amesos2 interface to the Baker package.
Definition: Amesos2_ShyLUBasker_decl.hpp:42
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_ShyLUBasker_def.hpp:510
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_ShyLUBasker_def.hpp:107
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_ShyLUBasker_def.hpp:375
Amesos2 ShyLUBasker declarations.
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_ShyLUBasker_def.hpp:579
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
int numericFactorization_impl()
ShyLUBasker specific numeric factorization.
Definition: Amesos2_ShyLUBasker_def.hpp:191
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition: Amesos2_ShyLUBasker_def.hpp:101