Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_ShyLUBasker_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
20 #ifndef AMESOS2_SHYLUBASKER_DEF_HPP
21 #define AMESOS2_SHYLUBASKER_DEF_HPP
22 
23 #include <Teuchos_Tuple.hpp>
24 #include <Teuchos_ParameterList.hpp>
25 #include <Teuchos_StandardParameterEntryValidators.hpp>
26 
29 
30 namespace Amesos2 {
31 
32 
33 template <class Matrix, class Vector>
34 ShyLUBasker<Matrix,Vector>::ShyLUBasker(
35  Teuchos::RCP<const Matrix> A,
36  Teuchos::RCP<Vector> X,
37  Teuchos::RCP<const Vector> B )
38  : SolverCore<Amesos2::ShyLUBasker,Matrix,Vector>(A, X, B)
39  , is_contiguous_(true)
40  , use_gather_(true)
41 {
42 
43  //Nothing
44 
45  // Override some default options
46  // TODO: use data_ here to init
47 #if defined(HAVE_AMESOS2_KOKKOS)
48  /*
49  static_assert(std::is_same<kokkos_exe,Kokkos::OpenMP>::value,
50  "Kokkos node type not supported by experimental ShyLUBasker Amesos2");
51  */
52  ShyLUbasker = new ::BaskerNS::BaskerTrilinosInterface<local_ordinal_type, shylubasker_dtype, Exe_Space>();
53  ShyLUbasker->Options.no_pivot = BASKER_FALSE;
54  ShyLUbasker->Options.static_delayed_pivot = 0;
55  ShyLUbasker->Options.symmetric = BASKER_FALSE;
56  ShyLUbasker->Options.realloc = BASKER_TRUE;
57  ShyLUbasker->Options.verbose = BASKER_FALSE;
58  ShyLUbasker->Options.prune = BASKER_TRUE;
59  ShyLUbasker->Options.btf_matching = 2; // use cardinary matching from Trilinos, globally
60  ShyLUbasker->Options.blk_matching = 0; // NOT use max-weight matching from Basker on each diagonal block
61  ShyLUbasker->Options.matrix_scaling = 0; // use matrix scaling on a big A block
62  ShyLUbasker->Options.min_block_size = 0; // no merging small blocks
63  ShyLUbasker->Options.amd_dom = BASKER_TRUE; // use block-wise AMD
64  ShyLUbasker->Options.use_metis = BASKER_TRUE; // use scotch/metis for ND (TODO: should METIS optional?)
65  ShyLUbasker->Options.use_nodeNDP = BASKER_TRUE; // use nodeNDP to compute ND partition
66  ShyLUbasker->Options.run_nd_on_leaves = BASKER_TRUE; // run ND on the final leaf-nodes
67  ShyLUbasker->Options.run_amd_on_leaves = BASKER_FALSE; // run AMD on the final leaf-nodes
68  ShyLUbasker->Options.transpose = BASKER_FALSE;
69  ShyLUbasker->Options.threaded_solve = BASKER_FALSE;
70  ShyLUbasker->Options.replace_zero_pivot = BASKER_TRUE;
71  ShyLUbasker->Options.replace_tiny_pivot = BASKER_FALSE;
72  ShyLUbasker->Options.verbose_matrix_out = BASKER_FALSE;
73 
74  ShyLUbasker->Options.user_fill = (double)BASKER_FILL_USER;
75  ShyLUbasker->Options.use_sequential_diag_facto = BASKER_FALSE;
76 #ifdef KOKKOS_ENABLE_OPENMP // TODO: check for KOKKOS_ENABLE_THREADS when ready
77  num_threads = Kokkos::OpenMP::impl_max_hardware_threads();
78 #else
79  num_threads = 1;
80 #endif
81  ShyLUbasker->Options.worker_threads = false;
82 
83 #else
84  TEUCHOS_TEST_FOR_EXCEPTION(1 != 0,
85  std::runtime_error,
86  "Amesos2_ShyLUBasker Exception: Do not have Kokkos enabled for ShyLUBasker");
87 #endif
88 }
89 
90 
91 template <class Matrix, class Vector>
92 ShyLUBasker<Matrix,Vector>::~ShyLUBasker( )
93 {
94  /* ShyLUBasker will cleanup its own internal memory*/
95 #if defined(HAVE_AMESOS2_KOKKOS)
96  ShyLUbasker->Finalize();
97  delete ShyLUbasker;
98 #endif
99 }
100 
101 template <class Matrix, class Vector>
102 bool
104  return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
105 }
106 
107 template<class Matrix, class Vector>
108 int
110 {
111  /* TODO: Define what it means for ShyLUBasker
112  */
113 #ifdef HAVE_AMESOS2_TIMERS
114  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
115 #endif
116 
117  return(0);
118 }
119 
120 
121 template <class Matrix, class Vector>
122 int
124 {
125 
126  int info = 0;
127  if(this->root_)
128  {
129  int nthreads = num_threads;
130  if (ShyLUbasker->Options.worker_threads) {
131  if (nthreads > 1) {
132  // keep one worker-thread / subdomain (where originally subdomain = num_threads)
133  nthreads /= 2;
134  } else {
135  // turn off worker threads if one thread
136  ShyLUbasker->Options.worker_threads = false;
137  }
138  }
139  ShyLUbasker->SetThreads(nthreads);
140 
141 
142  // NDE: Special case
143  // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
144  // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
145  // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
146 
147  if ( single_proc_optimization() ) {
148 
149  host_ordinal_type_array sp_rowptr;
150  host_ordinal_type_array sp_colind;
151  // this needs to be checked during loadA_impl...
152  this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
153  TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
154  std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
155  this->matrixA_->returnColInd_kokkos_view(sp_colind);
156  TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
157  std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
158 
159  host_value_type_array hsp_values;
160  this->matrixA_->returnValues_kokkos_view(hsp_values);
161  shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
162  //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
163  TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
164  std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
165 
166  // In this case, colptr_, rowind_, nzvals_ are invalid
167  info = ShyLUbasker->Symbolic(this->globalNumRows_,
168  this->globalNumCols_,
169  this->globalNumNonZeros_,
170  sp_rowptr.data(),
171  sp_colind.data(),
172  sp_values,
173  true); // true = _crs_transpose_needed
174 
175  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
176  std::runtime_error, "Error in ShyLUBasker Symbolic");
177  }
178  else
179  { //follow original code path if conditions not met
180  // In this case, loadA_impl updates colptr_, rowind_, nzvals_
181  shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
182  info = ShyLUbasker->Symbolic(this->globalNumRows_,
183  this->globalNumCols_,
184  this->globalNumNonZeros_,
185  colptr_view_.data(),
186  rowind_view_.data(),
187  sp_values);
188 
189  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
190  std::runtime_error, "Error in ShyLUBasker Symbolic");
191  }
192  } // end if (this->root_)
193  /*No symbolic factoriztion*/
194 
195  /* All processes should have the same error code */
196  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
197  return(info);
198 }
199 
200 
201 template <class Matrix, class Vector>
202 int
204 {
205  using Teuchos::as;
206 
207  int info = 0;
208  if ( this->root_ ){
209  { // Do factorization
210 #ifdef HAVE_AMESOS2_TIMERS
211  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
212 #endif
213 
214  // NDE: Special case
215  // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
216  // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
217  // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
218 
219  if ( single_proc_optimization() ) {
220 
221  host_ordinal_type_array sp_rowptr;
222  host_ordinal_type_array sp_colind;
223  this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
224  TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
225  std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
226  this->matrixA_->returnColInd_kokkos_view(sp_colind);
227  TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
228  std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
229 
230  host_value_type_array hsp_values;
231  this->matrixA_->returnValues_kokkos_view(hsp_values);
232  shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
233  //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
234 
235  TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
236  std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
237 
238  // In this case, colptr_, rowind_, nzvals_ are invalid
239  info = ShyLUbasker->Factor( this->globalNumRows_,
240  this->globalNumCols_,
241  this->globalNumNonZeros_,
242  sp_rowptr.data(),
243  sp_colind.data(),
244  sp_values);
245  }
246  else
247  {
248  // In this case, loadA_impl updates colptr_, rowind_, nzvals_
249  shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
250  info = ShyLUbasker->Factor(this->globalNumRows_,
251  this->globalNumCols_,
252  this->globalNumNonZeros_,
253  colptr_view_.data(),
254  rowind_view_.data(),
255  sp_values);
256  }
257 
258  //ShyLUbasker->DEBUG_PRINT();
259 
260  local_ordinal_type blnnz = local_ordinal_type(0);
261  local_ordinal_type bunnz = local_ordinal_type(0);
262  ShyLUbasker->GetLnnz(blnnz); // Add exception handling?
263  ShyLUbasker->GetUnnz(bunnz);
264 
265  // This is set after numeric factorization complete as pivoting can be used;
266  // In this case, a discrepancy between symbolic and numeric nnz total can occur.
267  this->setNnzLU( as<size_t>( blnnz + bunnz ) );
268 
269  } // end scope for timer
270  } // end if (this->root_)
271 
272  /* All processes should have the same error code */
273  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
274 
275  //global_size_type info_st = as<global_size_type>(info);
276  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
277  std::runtime_error, " ShyLUBasker::numericFactorization failed.");
278 
279  return(info);
280 }
281 
282 
283 template <class Matrix, class Vector>
284 int
286  const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
287  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
288 {
289  int ierr = 0; // returned error code
290 
291  using Teuchos::as;
292 
293  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
294  const size_t nrhs = X->getGlobalNumVectors();
295 
296  const bool ShyluBaskerTransposeRequest = this->control_.useTranspose_;
297  const bool initialize_data = true;
298  const bool do_not_initialize_data = false;
299  bool use_gather = use_gather_; // user param
300  use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
301  use_gather = (use_gather && (std::is_same<vector_scalar_type, float>::value ||
302  std::is_same<vector_scalar_type, double>::value)); // only for double or float vectors
303  {
304 #ifdef HAVE_AMESOS2_TIMERS
305  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
306 #endif
307  if ( single_proc_optimization() && nrhs == 1 ) {
308 
309  // no msp creation
310  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
311  host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
312 
313  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
314  host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
315 
316  } // end if ( single_proc_optimization() && nrhs == 1 )
317  else {
318  if (use_gather) {
319  int rval = B->gather(bValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
320  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
321  if (rval == 0) {
322  X->gather(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
323  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
324  } else {
325  use_gather = false;
326  }
327  }
328  if (!use_gather) {
329  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
330  host_solve_array_t>::do_get(initialize_data, B, bValues_,
331  as<size_t>(ld_rhs),
332  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
333  this->rowIndexBase_);
334 
335  // See Amesos2_Tacho_def.hpp for notes on why we 'get' x here.
336  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
337  host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
338  as<size_t>(ld_rhs),
339  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
340  this->rowIndexBase_);
341  }
342  }
343  }
344 
345  if ( this->root_ ) { // do solve
346 #ifdef HAVE_AMESOS2_TIMERS
347  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
348 #endif
349 
350  shylubasker_dtype * pxValues = function_map::convert_scalar(xValues_.data());
351  shylubasker_dtype * pbValues = function_map::convert_scalar(bValues_.data());
352  if (!ShyluBaskerTransposeRequest)
353  ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues);
354  else
355  ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues, true);
356  }
357  /* All processes should have the same error code */
358  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
359 
360  TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0,
361  std::runtime_error,
362  "Encountered zero diag element at: " << ierr);
363  TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1,
364  std::runtime_error,
365  "Could not alloc needed working memory for solve" );
366  {
367 #ifdef HAVE_AMESOS2_TIMERS
368  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
369 #endif
370  if (use_gather) {
371  int rval = X->scatter(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
372  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
373  if (rval != 0) use_gather = false;
374  }
375  if (!use_gather) {
376  Util::put_1d_data_helper_kokkos_view<
377  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
378  as<size_t>(ld_rhs),
379  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
380  }
381  }
382  return(ierr);
383 }
384 
385 
386 template <class Matrix, class Vector>
387 bool
389 {
390  // The ShyLUBasker can only handle square for right now
391  return( this->globalNumRows_ == this->globalNumCols_ );
392 }
393 
394 
395 template <class Matrix, class Vector>
396 void
397 ShyLUBasker<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
398 {
399  using Teuchos::RCP;
400  using Teuchos::getIntegralValue;
401  using Teuchos::ParameterEntryValidator;
402 
403  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
404 
405  if(parameterList->isParameter("IsContiguous"))
406  {
407  is_contiguous_ = parameterList->get<bool>("IsContiguous");
408  }
409 
410  if(parameterList->isParameter("UseCustomGather"))
411  {
412  use_gather_ = parameterList->get<bool>("UseCustomGather");
413  }
414 
415  if(parameterList->isParameter("num_threads"))
416  {
417  num_threads = parameterList->get<int>("num_threads");
418  }
419  if(parameterList->isParameter("worker_threads"))
420  {
421  ShyLUbasker->Options.worker_threads = parameterList->get<bool>("worker_threads");
422  }
423  if(parameterList->isParameter("pivot"))
424  {
425  ShyLUbasker->Options.no_pivot = (!parameterList->get<bool>("pivot"));
426  }
427  if(parameterList->isParameter("delayed pivot"))
428  {
429  ShyLUbasker->Options.static_delayed_pivot = (parameterList->get<int>("delayed pivot"));
430  }
431  if(parameterList->isParameter("pivot_tol"))
432  {
433  ShyLUbasker->Options.pivot_tol = parameterList->get<double>("pivot_tol");
434  }
435  if(parameterList->isParameter("symmetric"))
436  {
437  ShyLUbasker->Options.symmetric = parameterList->get<bool>("symmetric");
438  }
439  if(parameterList->isParameter("realloc"))
440  {
441  ShyLUbasker->Options.realloc = parameterList->get<bool>("realloc");
442  }
443  if(parameterList->isParameter("verbose"))
444  {
445  ShyLUbasker->Options.verbose = parameterList->get<bool>("verbose");
446  }
447  if(parameterList->isParameter("verbose_matrix"))
448  {
449  ShyLUbasker->Options.verbose_matrix_out = parameterList->get<bool>("verbose_matrix");
450  }
451  if(parameterList->isParameter("btf"))
452  {
453  ShyLUbasker->Options.btf = parameterList->get<bool>("btf");
454  }
455  if(parameterList->isParameter("use_metis"))
456  {
457  ShyLUbasker->Options.use_metis = parameterList->get<bool>("use_metis");
458  }
459  if(parameterList->isParameter("use_nodeNDP"))
460  {
461  ShyLUbasker->Options.use_nodeNDP = parameterList->get<bool>("use_nodeNDP");
462  }
463  if(parameterList->isParameter("run_nd_on_leaves"))
464  {
465  ShyLUbasker->Options.run_nd_on_leaves = parameterList->get<bool>("run_nd_on_leaves");
466  }
467  if(parameterList->isParameter("run_amd_on_leaves"))
468  {
469  ShyLUbasker->Options.run_amd_on_leaves = parameterList->get<bool>("run_amd_on_leaves");
470  }
471  if(parameterList->isParameter("amd_on_blocks"))
472  {
473  ShyLUbasker->Options.amd_dom = parameterList->get<bool>("amd_on_blocks");
474  }
475  if(parameterList->isParameter("transpose"))
476  {
477  // NDE: set transpose vs non-transpose mode as bool; track separate shylubasker objects
478  const auto transpose = parameterList->get<bool>("transpose");
479  if (transpose == true)
480  this->control_.useTranspose_ = true;
481  }
482  if(parameterList->isParameter("threaded_solve"))
483  {
484  ShyLUbasker->Options.threaded_solve = parameterList->get<bool>("threaded_solve");
485  }
486  if(parameterList->isParameter("use_sequential_diag_facto"))
487  {
488  ShyLUbasker->Options.use_sequential_diag_facto = parameterList->get<bool>("use_sequential_diag_facto");
489  }
490  if(parameterList->isParameter("user_fill"))
491  {
492  ShyLUbasker->Options.user_fill = parameterList->get<double>("user_fill");
493  }
494  if(parameterList->isParameter("prune"))
495  {
496  ShyLUbasker->Options.prune = parameterList->get<bool>("prune");
497  }
498  if(parameterList->isParameter("replace_zero_pivot"))
499  {
500  ShyLUbasker->Options.replace_zero_pivot = parameterList->get<bool>("replace_zero_pivot");
501  }
502  if(parameterList->isParameter("replace_tiny_pivot"))
503  {
504  ShyLUbasker->Options.replace_tiny_pivot = parameterList->get<bool>("replace_tiny_pivot");
505  }
506  if(parameterList->isParameter("btf_matching"))
507  {
508  ShyLUbasker->Options.btf_matching = parameterList->get<int>("btf_matching");
509  if (ShyLUbasker->Options.btf_matching == 1 || ShyLUbasker->Options.btf_matching == 2) {
510  ShyLUbasker->Options.matching = true;
511  } else {
512  ShyLUbasker->Options.matching = false;
513  }
514  }
515  if(parameterList->isParameter("blk_matching"))
516  {
517  ShyLUbasker->Options.blk_matching = parameterList->get<int>("blk_matching");
518  }
519  if(parameterList->isParameter("matrix_scaling"))
520  {
521  ShyLUbasker->Options.matrix_scaling = parameterList->get<int>("matrix_scaling");
522  }
523  if(parameterList->isParameter("min_block_size"))
524  {
525  ShyLUbasker->Options.min_block_size = parameterList->get<int>("min_block_size");
526  }
527 }
528 
529 template <class Matrix, class Vector>
530 Teuchos::RCP<const Teuchos::ParameterList>
532 {
533  using Teuchos::ParameterList;
534 
535  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
536 
537  if( is_null(valid_params) )
538  {
539  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
540  pl->set("IsContiguous", true,
541  "Are GIDs contiguous");
542  pl->set("UseCustomGather", true,
543  "Use Matrix-gather routine");
544  pl->set("num_threads", 1,
545  "Number of threads");
546  pl->set("pivot", false,
547  "Should not pivot");
548  pl->set("delayed pivot", 0,
549  "Apply static delayed pivot on a big block");
550  pl->set("pivot_tol", .0001,
551  "Tolerance before pivot, currently not used");
552  pl->set("symmetric", false,
553  "Should Symbolic assume symmetric nonzero pattern");
554  pl->set("realloc" , false,
555  "Should realloc space if not enough");
556  pl->set("verbose", false,
557  "Information about factoring");
558  pl->set("verbose_matrix", false,
559  "Give Permuted Matrices");
560  pl->set("btf", true,
561  "Use BTF ordering");
562  pl->set("prune", false,
563  "Use prune on BTF blocks (Not Supported)");
564  pl->set("btf_matching", 2,
565  "Matching option for BTF: 0 = none, 1 = Basker, 2 = Trilinos (default), (3 = MC64 if enabled)");
566  pl->set("blk_matching", 0,
567  "Matching optioon for block: 0 = none, 1 or anything else = Basker (default), (2 = MC64 if enabled)");
568  pl->set("matrix_scaling", 0,
569  "Use matrix scaling to biig A BTF block: 0 = no-scaling, 1 = symmetric diagonal scaling, 2 = row-max, and then col-max scaling");
570  pl->set("min_block_size", 0,
571  "Size of the minimum diagonal blocks");
572  pl->set("replace_zero_pivot", true,
573  "Replace zero pivots during the numerical factorization");
574  pl->set("replace_tiny_pivot", false,
575  "Replace tiny pivots during the numerical factorization");
576  pl->set("use_metis", true,
577  "Use METIS for ND");
578  pl->set("use_nodeNDP", true,
579  "Use nodeND to compute ND partition");
580  pl->set("run_nd_on_leaves", false,
581  "Run ND on the final leaf-nodes for ND factorization");
582  pl->set("run_amd_on_leaves", false,
583  "Run AMD on the final leaf-nodes for ND factorization");
584  pl->set("amd_on_blocks", true,
585  "Run AMD on each diagonal blocks");
586  pl->set("transpose", false,
587  "Solve the transpose A");
588  pl->set("threaded_solve", false,
589  "Use threads for forward/backward solves");
590  pl->set("worker_threads", false,
591  "Use worker thread for ND factorization");
592  pl->set("use_sequential_diag_facto", false,
593  "Use sequential algorithm to factor each diagonal block");
594  pl->set("user_fill", (double)BASKER_FILL_USER,
595  "User-provided padding for the fill ratio");
596  valid_params = pl;
597  }
598  return valid_params;
599 }
600 
601 
602 template <class Matrix, class Vector>
603 bool
605 {
606  using Teuchos::as;
607  if(current_phase == SOLVE || current_phase == PREORDERING ) return( false );
608 
609  #ifdef HAVE_AMESOS2_TIMERS
610  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
611  #endif
612 
613 
614  // NDE: Can clean up duplicated code with the #ifdef guards
615  if ( single_proc_optimization() ) {
616  // NDE: Nothing is done in this special case - CRS raw pointers are passed to SHYLUBASKER and transpose of copies handled there
617  // In this case, colptr_, rowind_, nzvals_ are invalid
618  }
619  else
620  {
621 
622  // Only the root image needs storage allocated
623  if( this->root_ && current_phase == SYMBFACT )
624  {
625  Kokkos::resize(nzvals_view_, this->globalNumNonZeros_);
626  Kokkos::resize(rowind_view_, this->globalNumNonZeros_);
627  Kokkos::resize(colptr_view_, this->globalNumCols_ + 1); //this will be wrong for case of gapped col ids, e.g. 0,2,4,9; num_cols = 10 ([0,10)) but num GIDs = 4...
628  }
629 
630  local_ordinal_type nnz_ret = -1;
631  bool use_gather = use_gather_; // user param
632  use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
633  use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value)); // only for double or float
634  {
635  #ifdef HAVE_AMESOS2_TIMERS
636  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
637  #endif
638  if (use_gather) {
639  bool column_major = true;
640  if (!is_contiguous_) {
641  auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase);
642  nnz_ret = contig_mat->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
643  this->transpose_map, this->nzvals_t, column_major, current_phase);
644  } else {
645  nnz_ret = this->matrixA_->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
646  this->transpose_map, this->nzvals_t, column_major, current_phase);
647  }
648  // gather failed (e.g., not implemened for KokkosCrsMatrix)
649  // in case of the failure, it falls back to the original "do_get"
650  if (nnz_ret < 0) use_gather = false;
651  }
652  if (!use_gather) {
654  MatrixAdapter<Matrix>, host_value_type_array, host_ordinal_type_array, host_ordinal_type_array>
655  ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret,
656  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
657  ARBITRARY,
658  this->rowIndexBase_); // copies from matrixA_ to ShyLUBasker ConcreteSolver cp, ri, nzval members
659  }
660  }
661 
662  // gather return the total nnz_ret on every MPI process
663  if (use_gather || this->root_) {
664  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
665  std::runtime_error,
666  "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals("
667  +std::to_string(nnz_ret)+" vs "+std::to_string(this->globalNumNonZeros_)+")");
668  }
669  } //end alternative path
670  return true;
671 }
672 
673 
674 template<class Matrix, class Vector>
675 const char* ShyLUBasker<Matrix,Vector>::name = "ShyLUBasker";
676 
677 
678 } // end namespace Amesos2
679 
680 #endif // AMESOS2_SHYLUBASKER_DEF_HPP
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:618
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
ShyLUBasker specific solve.
Definition: Amesos2_ShyLUBasker_def.hpp:285
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Amesos2 interface to the Baker package.
Definition: Amesos2_ShyLUBasker_decl.hpp:42
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_ShyLUBasker_def.hpp:531
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_ShyLUBasker_def.hpp:109
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_ShyLUBasker_def.hpp:388
Amesos2 ShyLUBasker declarations.
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_ShyLUBasker_def.hpp:604
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
int numericFactorization_impl()
ShyLUBasker specific numeric factorization.
Definition: Amesos2_ShyLUBasker_def.hpp:203
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition: Amesos2_ShyLUBasker_def.hpp:103