20 #ifndef AMESOS2_SHYLUBASKER_DEF_HPP
21 #define AMESOS2_SHYLUBASKER_DEF_HPP
23 #include <Teuchos_Tuple.hpp>
24 #include <Teuchos_ParameterList.hpp>
25 #include <Teuchos_StandardParameterEntryValidators.hpp>
33 template <
class Matrix,
class Vector>
34 ShyLUBasker<Matrix,Vector>::ShyLUBasker(
35 Teuchos::RCP<const Matrix> A,
36 Teuchos::RCP<Vector> X,
37 Teuchos::RCP<const Vector> B )
38 : SolverCore<Amesos2::ShyLUBasker,Matrix,Vector>(A, X, B)
39 , is_contiguous_(true)
47 #if defined(HAVE_AMESOS2_KOKKOS)
52 ShyLUbasker = new ::BaskerNS::BaskerTrilinosInterface<local_ordinal_type, shylubasker_dtype, Exe_Space>();
53 ShyLUbasker->Options.no_pivot = BASKER_FALSE;
54 ShyLUbasker->Options.static_delayed_pivot = 0;
55 ShyLUbasker->Options.symmetric = BASKER_FALSE;
56 ShyLUbasker->Options.realloc = BASKER_TRUE;
57 ShyLUbasker->Options.verbose = BASKER_FALSE;
58 ShyLUbasker->Options.prune = BASKER_TRUE;
59 ShyLUbasker->Options.btf_matching = 2;
60 ShyLUbasker->Options.blk_matching = 0;
61 ShyLUbasker->Options.matrix_scaling = 0;
62 ShyLUbasker->Options.min_block_size = 0;
63 ShyLUbasker->Options.amd_dom = BASKER_TRUE;
64 ShyLUbasker->Options.use_metis = BASKER_TRUE;
65 ShyLUbasker->Options.use_nodeNDP = BASKER_TRUE;
66 ShyLUbasker->Options.run_nd_on_leaves = BASKER_TRUE;
67 ShyLUbasker->Options.run_amd_on_leaves = BASKER_FALSE;
68 ShyLUbasker->Options.transpose = BASKER_FALSE;
69 ShyLUbasker->Options.threaded_solve = BASKER_FALSE;
70 ShyLUbasker->Options.replace_zero_pivot = BASKER_TRUE;
71 ShyLUbasker->Options.replace_tiny_pivot = BASKER_FALSE;
72 ShyLUbasker->Options.verbose_matrix_out = BASKER_FALSE;
74 ShyLUbasker->Options.user_fill = (double)BASKER_FILL_USER;
75 ShyLUbasker->Options.use_sequential_diag_facto = BASKER_FALSE;
76 #ifdef KOKKOS_ENABLE_OPENMP // TODO: check for KOKKOS_ENABLE_THREADS when ready
77 num_threads = Kokkos::OpenMP::impl_max_hardware_threads();
81 ShyLUbasker->Options.worker_threads =
false;
84 TEUCHOS_TEST_FOR_EXCEPTION(1 != 0,
86 "Amesos2_ShyLUBasker Exception: Do not have Kokkos enabled for ShyLUBasker");
91 template <
class Matrix,
class Vector>
92 ShyLUBasker<Matrix,Vector>::~ShyLUBasker( )
95 #if defined(HAVE_AMESOS2_KOKKOS)
96 ShyLUbasker->Finalize();
101 template <
class Matrix,
class Vector>
104 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
107 template<
class Matrix,
class Vector>
113 #ifdef HAVE_AMESOS2_TIMERS
114 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
121 template <
class Matrix,
class Vector>
129 int nthreads = num_threads;
130 if (ShyLUbasker->Options.worker_threads) {
136 ShyLUbasker->Options.worker_threads =
false;
139 ShyLUbasker->SetThreads(nthreads);
147 if ( single_proc_optimization() ) {
149 host_ordinal_type_array sp_rowptr;
150 host_ordinal_type_array sp_colind;
152 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
153 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() ==
nullptr,
154 std::runtime_error,
"Amesos2 Runtime Error: sp_rowptr returned null ");
155 this->matrixA_->returnColInd_kokkos_view(sp_colind);
156 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() ==
nullptr,
157 std::runtime_error,
"Amesos2 Runtime Error: sp_colind returned null ");
159 host_value_type_array hsp_values;
160 this->matrixA_->returnValues_kokkos_view(hsp_values);
161 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
163 TEUCHOS_TEST_FOR_EXCEPTION(sp_values ==
nullptr,
164 std::runtime_error,
"Amesos2 Runtime Error: sp_values returned null ");
167 info = ShyLUbasker->Symbolic(this->globalNumRows_,
168 this->globalNumCols_,
169 this->globalNumNonZeros_,
175 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
176 std::runtime_error,
"Error in ShyLUBasker Symbolic");
181 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
182 info = ShyLUbasker->Symbolic(this->globalNumRows_,
183 this->globalNumCols_,
184 this->globalNumNonZeros_,
189 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
190 std::runtime_error,
"Error in ShyLUBasker Symbolic");
196 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
201 template <
class Matrix,
class Vector>
210 #ifdef HAVE_AMESOS2_TIMERS
211 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
219 if ( single_proc_optimization() ) {
221 host_ordinal_type_array sp_rowptr;
222 host_ordinal_type_array sp_colind;
223 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
224 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() ==
nullptr,
225 std::runtime_error,
"Amesos2 Runtime Error: sp_rowptr returned null ");
226 this->matrixA_->returnColInd_kokkos_view(sp_colind);
227 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() ==
nullptr,
228 std::runtime_error,
"Amesos2 Runtime Error: sp_colind returned null ");
230 host_value_type_array hsp_values;
231 this->matrixA_->returnValues_kokkos_view(hsp_values);
232 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
235 TEUCHOS_TEST_FOR_EXCEPTION(sp_values ==
nullptr,
236 std::runtime_error,
"Amesos2 Runtime Error: sp_values returned null ");
239 info = ShyLUbasker->Factor( this->globalNumRows_,
240 this->globalNumCols_,
241 this->globalNumNonZeros_,
249 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
250 info = ShyLUbasker->Factor(this->globalNumRows_,
251 this->globalNumCols_,
252 this->globalNumNonZeros_,
260 local_ordinal_type blnnz = local_ordinal_type(0);
261 local_ordinal_type bunnz = local_ordinal_type(0);
262 ShyLUbasker->GetLnnz(blnnz);
263 ShyLUbasker->GetUnnz(bunnz);
267 this->setNnzLU( as<size_t>( blnnz + bunnz ) );
273 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
276 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
277 std::runtime_error,
" ShyLUBasker::numericFactorization failed.");
283 template <
class Matrix,
class Vector>
293 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
294 const size_t nrhs = X->getGlobalNumVectors();
296 const bool ShyluBaskerTransposeRequest = this->control_.useTranspose_;
297 const bool initialize_data =
true;
298 const bool do_not_initialize_data =
false;
299 bool use_gather = use_gather_;
300 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1);
301 use_gather = (use_gather && (std::is_same<vector_scalar_type, float>::value ||
302 std::is_same<vector_scalar_type, double>::value));
304 #ifdef HAVE_AMESOS2_TIMERS
305 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
307 if ( single_proc_optimization() && nrhs == 1 ) {
310 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
311 host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
313 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
314 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
319 int rval = B->gather(bValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
320 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
322 X->gather(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
323 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
329 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
330 host_solve_array_t>::do_get(initialize_data, B, bValues_,
332 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
333 this->rowIndexBase_);
336 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
337 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
339 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
340 this->rowIndexBase_);
346 #ifdef HAVE_AMESOS2_TIMERS
347 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
350 shylubasker_dtype * pxValues = function_map::convert_scalar(xValues_.data());
351 shylubasker_dtype * pbValues = function_map::convert_scalar(bValues_.data());
352 if (!ShyluBaskerTransposeRequest)
353 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues);
355 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues,
true);
358 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
360 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0,
362 "Encountered zero diag element at: " << ierr);
363 TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1,
365 "Could not alloc needed working memory for solve" );
367 #ifdef HAVE_AMESOS2_TIMERS
368 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
371 int rval = X->scatter(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
372 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
373 if (rval != 0) use_gather =
false;
376 Util::put_1d_data_helper_kokkos_view<
379 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
386 template <
class Matrix,
class Vector>
391 return( this->globalNumRows_ == this->globalNumCols_ );
395 template <
class Matrix,
class Vector>
400 using Teuchos::getIntegralValue;
401 using Teuchos::ParameterEntryValidator;
403 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
405 if(parameterList->isParameter(
"IsContiguous"))
407 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous");
410 if(parameterList->isParameter(
"UseCustomGather"))
412 use_gather_ = parameterList->get<
bool>(
"UseCustomGather");
415 if(parameterList->isParameter(
"num_threads"))
417 num_threads = parameterList->get<
int>(
"num_threads");
419 if(parameterList->isParameter(
"worker_threads"))
421 ShyLUbasker->Options.worker_threads = parameterList->get<
bool>(
"worker_threads");
423 if(parameterList->isParameter(
"pivot"))
425 ShyLUbasker->Options.no_pivot = (!parameterList->get<
bool>(
"pivot"));
427 if(parameterList->isParameter(
"delayed pivot"))
429 ShyLUbasker->Options.static_delayed_pivot = (parameterList->get<
int>(
"delayed pivot"));
431 if(parameterList->isParameter(
"pivot_tol"))
433 ShyLUbasker->Options.pivot_tol = parameterList->get<
double>(
"pivot_tol");
435 if(parameterList->isParameter(
"symmetric"))
437 ShyLUbasker->Options.symmetric = parameterList->get<
bool>(
"symmetric");
439 if(parameterList->isParameter(
"realloc"))
441 ShyLUbasker->Options.realloc = parameterList->get<
bool>(
"realloc");
443 if(parameterList->isParameter(
"verbose"))
445 ShyLUbasker->Options.verbose = parameterList->get<
bool>(
"verbose");
447 if(parameterList->isParameter(
"verbose_matrix"))
449 ShyLUbasker->Options.verbose_matrix_out = parameterList->get<
bool>(
"verbose_matrix");
451 if(parameterList->isParameter(
"btf"))
453 ShyLUbasker->Options.btf = parameterList->get<
bool>(
"btf");
455 if(parameterList->isParameter(
"use_metis"))
457 ShyLUbasker->Options.use_metis = parameterList->get<
bool>(
"use_metis");
459 if(parameterList->isParameter(
"use_nodeNDP"))
461 ShyLUbasker->Options.use_nodeNDP = parameterList->get<
bool>(
"use_nodeNDP");
463 if(parameterList->isParameter(
"run_nd_on_leaves"))
465 ShyLUbasker->Options.run_nd_on_leaves = parameterList->get<
bool>(
"run_nd_on_leaves");
467 if(parameterList->isParameter(
"run_amd_on_leaves"))
469 ShyLUbasker->Options.run_amd_on_leaves = parameterList->get<
bool>(
"run_amd_on_leaves");
471 if(parameterList->isParameter(
"amd_on_blocks"))
473 ShyLUbasker->Options.amd_dom = parameterList->get<
bool>(
"amd_on_blocks");
475 if(parameterList->isParameter(
"transpose"))
478 const auto transpose = parameterList->get<
bool>(
"transpose");
480 this->control_.useTranspose_ =
true;
482 if(parameterList->isParameter(
"threaded_solve"))
484 ShyLUbasker->Options.threaded_solve = parameterList->get<
bool>(
"threaded_solve");
486 if(parameterList->isParameter(
"use_sequential_diag_facto"))
488 ShyLUbasker->Options.use_sequential_diag_facto = parameterList->get<
bool>(
"use_sequential_diag_facto");
490 if(parameterList->isParameter(
"user_fill"))
492 ShyLUbasker->Options.user_fill = parameterList->get<
double>(
"user_fill");
494 if(parameterList->isParameter(
"prune"))
496 ShyLUbasker->Options.prune = parameterList->get<
bool>(
"prune");
498 if(parameterList->isParameter(
"replace_zero_pivot"))
500 ShyLUbasker->Options.replace_zero_pivot = parameterList->get<
bool>(
"replace_zero_pivot");
502 if(parameterList->isParameter(
"replace_tiny_pivot"))
504 ShyLUbasker->Options.replace_tiny_pivot = parameterList->get<
bool>(
"replace_tiny_pivot");
506 if(parameterList->isParameter(
"btf_matching"))
508 ShyLUbasker->Options.btf_matching = parameterList->get<
int>(
"btf_matching");
509 if (ShyLUbasker->Options.btf_matching == 1 || ShyLUbasker->Options.btf_matching == 2) {
510 ShyLUbasker->Options.matching =
true;
512 ShyLUbasker->Options.matching =
false;
515 if(parameterList->isParameter(
"blk_matching"))
517 ShyLUbasker->Options.blk_matching = parameterList->get<
int>(
"blk_matching");
519 if(parameterList->isParameter(
"matrix_scaling"))
521 ShyLUbasker->Options.matrix_scaling = parameterList->get<
int>(
"matrix_scaling");
523 if(parameterList->isParameter(
"min_block_size"))
525 ShyLUbasker->Options.min_block_size = parameterList->get<
int>(
"min_block_size");
529 template <
class Matrix,
class Vector>
530 Teuchos::RCP<const Teuchos::ParameterList>
533 using Teuchos::ParameterList;
535 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
537 if( is_null(valid_params) )
539 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
540 pl->set(
"IsContiguous",
true,
541 "Are GIDs contiguous");
542 pl->set(
"UseCustomGather",
true,
543 "Use Matrix-gather routine");
544 pl->set(
"num_threads", 1,
545 "Number of threads");
546 pl->set(
"pivot",
false,
548 pl->set(
"delayed pivot", 0,
549 "Apply static delayed pivot on a big block");
550 pl->set(
"pivot_tol", .0001,
551 "Tolerance before pivot, currently not used");
552 pl->set(
"symmetric",
false,
553 "Should Symbolic assume symmetric nonzero pattern");
554 pl->set(
"realloc" ,
false,
555 "Should realloc space if not enough");
556 pl->set(
"verbose",
false,
557 "Information about factoring");
558 pl->set(
"verbose_matrix",
false,
559 "Give Permuted Matrices");
562 pl->set(
"prune",
false,
563 "Use prune on BTF blocks (Not Supported)");
564 pl->set(
"btf_matching", 2,
565 "Matching option for BTF: 0 = none, 1 = Basker, 2 = Trilinos (default), (3 = MC64 if enabled)");
566 pl->set(
"blk_matching", 0,
567 "Matching optioon for block: 0 = none, 1 or anything else = Basker (default), (2 = MC64 if enabled)");
568 pl->set(
"matrix_scaling", 0,
569 "Use matrix scaling to biig A BTF block: 0 = no-scaling, 1 = symmetric diagonal scaling, 2 = row-max, and then col-max scaling");
570 pl->set(
"min_block_size", 0,
571 "Size of the minimum diagonal blocks");
572 pl->set(
"replace_zero_pivot",
true,
573 "Replace zero pivots during the numerical factorization");
574 pl->set(
"replace_tiny_pivot",
false,
575 "Replace tiny pivots during the numerical factorization");
576 pl->set(
"use_metis",
true,
578 pl->set(
"use_nodeNDP",
true,
579 "Use nodeND to compute ND partition");
580 pl->set(
"run_nd_on_leaves",
false,
581 "Run ND on the final leaf-nodes for ND factorization");
582 pl->set(
"run_amd_on_leaves",
false,
583 "Run AMD on the final leaf-nodes for ND factorization");
584 pl->set(
"amd_on_blocks",
true,
585 "Run AMD on each diagonal blocks");
586 pl->set(
"transpose",
false,
587 "Solve the transpose A");
588 pl->set(
"threaded_solve",
false,
589 "Use threads for forward/backward solves");
590 pl->set(
"worker_threads",
false,
591 "Use worker thread for ND factorization");
592 pl->set(
"use_sequential_diag_facto",
false,
593 "Use sequential algorithm to factor each diagonal block");
594 pl->set(
"user_fill", (
double)BASKER_FILL_USER,
595 "User-provided padding for the fill ratio");
602 template <
class Matrix,
class Vector>
607 if(current_phase == SOLVE || current_phase == PREORDERING )
return(
false );
609 #ifdef HAVE_AMESOS2_TIMERS
610 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
615 if ( single_proc_optimization() ) {
623 if( this->root_ && current_phase == SYMBFACT )
625 Kokkos::resize(nzvals_view_, this->globalNumNonZeros_);
626 Kokkos::resize(rowind_view_, this->globalNumNonZeros_);
627 Kokkos::resize(colptr_view_, this->globalNumCols_ + 1);
630 local_ordinal_type nnz_ret = -1;
631 bool use_gather = use_gather_;
632 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1);
633 use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value));
635 #ifdef HAVE_AMESOS2_TIMERS
636 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
639 bool column_major =
true;
640 if (!is_contiguous_) {
641 auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase);
642 nnz_ret = contig_mat->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
643 this->transpose_map, this->nzvals_t, column_major, current_phase);
645 nnz_ret = this->matrixA_->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
646 this->transpose_map, this->nzvals_t, column_major, current_phase);
650 if (nnz_ret < 0) use_gather =
false;
655 ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret,
656 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
658 this->rowIndexBase_);
663 if (use_gather || this->root_) {
664 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
666 "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals("
667 +std::to_string(nnz_ret)+
" vs "+std::to_string(this->globalNumNonZeros_)+
")");
674 template<
class Matrix,
class Vector>
680 #endif // AMESOS2_SHYLUBASKER_DEF_HPP
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:618
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
ShyLUBasker specific solve.
Definition: Amesos2_ShyLUBasker_def.hpp:285
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Amesos2 interface to the Baker package.
Definition: Amesos2_ShyLUBasker_decl.hpp:42
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_ShyLUBasker_def.hpp:531
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_ShyLUBasker_def.hpp:109
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_ShyLUBasker_def.hpp:388
Amesos2 ShyLUBasker declarations.
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_ShyLUBasker_def.hpp:604
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
int numericFactorization_impl()
ShyLUBasker specific numeric factorization.
Definition: Amesos2_ShyLUBasker_def.hpp:203
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition: Amesos2_ShyLUBasker_def.hpp:103