54 #ifndef AMESOS2_SHYLUBASKER_DEF_HPP
55 #define AMESOS2_SHYLUBASKER_DEF_HPP
57 #include <Teuchos_Tuple.hpp>
58 #include <Teuchos_ParameterList.hpp>
59 #include <Teuchos_StandardParameterEntryValidators.hpp>
67 template <
class Matrix,
class Vector>
68 ShyLUBasker<Matrix,Vector>::ShyLUBasker(
69 Teuchos::RCP<const Matrix> A,
70 Teuchos::RCP<Vector> X,
71 Teuchos::RCP<const Vector> B )
72 : SolverCore<Amesos2::ShyLUBasker,Matrix,Vector>(A, X, B)
73 , is_contiguous_(true)
80 #if defined(HAVE_AMESOS2_KOKKOS) && defined(KOKKOS_ENABLE_OPENMP)
85 typedef Kokkos::OpenMP Exe_Space;
87 ShyLUbasker = new ::BaskerNS::BaskerTrilinosInterface<local_ordinal_type, shylubasker_dtype, Exe_Space>();
88 ShyLUbasker->Options.no_pivot = BASKER_FALSE;
89 ShyLUbasker->Options.static_delayed_pivot = 0;
90 ShyLUbasker->Options.symmetric = BASKER_FALSE;
91 ShyLUbasker->Options.realloc = BASKER_TRUE;
92 ShyLUbasker->Options.verbose = BASKER_FALSE;
93 ShyLUbasker->Options.prune = BASKER_TRUE;
94 ShyLUbasker->Options.btf_matching = 2;
95 ShyLUbasker->Options.blk_matching = 1;
96 ShyLUbasker->Options.matrix_scaling = 0;
97 ShyLUbasker->Options.min_block_size = 0;
98 ShyLUbasker->Options.amd_dom = BASKER_TRUE;
99 ShyLUbasker->Options.use_metis = BASKER_TRUE;
100 ShyLUbasker->Options.use_nodeNDP = BASKER_TRUE;
101 ShyLUbasker->Options.run_nd_on_leaves = BASKER_TRUE;
102 ShyLUbasker->Options.run_amd_on_leaves = BASKER_FALSE;
103 ShyLUbasker->Options.transpose = BASKER_FALSE;
104 ShyLUbasker->Options.replace_tiny_pivot = BASKER_FALSE;
105 ShyLUbasker->Options.verbose_matrix_out = BASKER_FALSE;
107 ShyLUbasker->Options.user_fill = (double)BASKER_FILL_USER;
108 ShyLUbasker->Options.use_sequential_diag_facto = BASKER_FALSE;
109 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
110 num_threads = Kokkos::OpenMP::max_hardware_threads();
112 num_threads = Kokkos::OpenMP::impl_max_hardware_threads();
116 TEUCHOS_TEST_FOR_EXCEPTION(1 != 0,
118 "Amesos2_ShyLUBasker Exception: Do not have supported Kokkos node type (OpenMP) enabled for ShyLUBasker");
123 template <
class Matrix,
class Vector>
124 ShyLUBasker<Matrix,Vector>::~ShyLUBasker( )
127 #if defined(HAVE_AMESOS2_KOKKOS) && defined(KOKKOS_ENABLE_OPENMP)
128 ShyLUbasker->Finalize();
133 template <
class Matrix,
class Vector>
136 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
139 template<
class Matrix,
class Vector>
145 #ifdef HAVE_AMESOS2_TIMERS
146 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
153 template <
class Matrix,
class Vector>
161 ShyLUbasker->SetThreads(num_threads);
169 if ( single_proc_optimization() ) {
171 host_ordinal_type_array sp_rowptr;
172 host_ordinal_type_array sp_colind;
174 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
175 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() ==
nullptr,
176 std::runtime_error,
"Amesos2 Runtime Error: sp_rowptr returned null ");
177 this->matrixA_->returnColInd_kokkos_view(sp_colind);
178 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() ==
nullptr,
179 std::runtime_error,
"Amesos2 Runtime Error: sp_colind returned null ");
181 host_value_type_array hsp_values;
182 this->matrixA_->returnValues_kokkos_view(hsp_values);
183 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
185 TEUCHOS_TEST_FOR_EXCEPTION(sp_values ==
nullptr,
186 std::runtime_error,
"Amesos2 Runtime Error: sp_values returned null ");
189 info = ShyLUbasker->Symbolic(this->globalNumRows_,
190 this->globalNumCols_,
191 this->globalNumNonZeros_,
197 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
198 std::runtime_error,
"Error in ShyLUBasker Symbolic");
203 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
204 info = ShyLUbasker->Symbolic(this->globalNumRows_,
205 this->globalNumCols_,
206 this->globalNumNonZeros_,
211 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
212 std::runtime_error,
"Error in ShyLUBasker Symbolic");
218 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
223 template <
class Matrix,
class Vector>
232 #ifdef HAVE_AMESOS2_TIMERS
233 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
242 if ( single_proc_optimization() ) {
244 host_ordinal_type_array sp_rowptr;
245 host_ordinal_type_array sp_colind;
246 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
247 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() ==
nullptr,
248 std::runtime_error,
"Amesos2 Runtime Error: sp_rowptr returned null ");
249 this->matrixA_->returnColInd_kokkos_view(sp_colind);
250 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() ==
nullptr,
251 std::runtime_error,
"Amesos2 Runtime Error: sp_colind returned null ");
253 host_value_type_array hsp_values;
254 this->matrixA_->returnValues_kokkos_view(hsp_values);
255 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
258 TEUCHOS_TEST_FOR_EXCEPTION(sp_values ==
nullptr,
259 std::runtime_error,
"Amesos2 Runtime Error: sp_values returned null ");
262 info = ShyLUbasker->Factor( this->globalNumRows_,
263 this->globalNumCols_,
264 this->globalNumNonZeros_,
269 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
270 std::runtime_error,
"Error ShyLUBasker Factor");
275 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
276 info = ShyLUbasker->Factor(this->globalNumRows_,
277 this->globalNumCols_,
278 this->globalNumNonZeros_,
283 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
284 std::runtime_error,
"Error ShyLUBasker Factor");
289 local_ordinal_type blnnz = local_ordinal_type(0);
290 local_ordinal_type bunnz = local_ordinal_type(0);
291 ShyLUbasker->GetLnnz(blnnz);
292 ShyLUbasker->GetUnnz(bunnz);
296 this->setNnzLU( as<size_t>( blnnz + bunnz ) );
302 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
306 TEUCHOS_TEST_FOR_EXCEPTION(info == -1,
308 "ShyLUBasker: Could not alloc space for L and U");
309 TEUCHOS_TEST_FOR_EXCEPTION(info == -2,
311 "ShyLUBasker: Could not alloc needed work space");
312 TEUCHOS_TEST_FOR_EXCEPTION(info == -3,
314 "ShyLUBasker: Could not alloc additional memory needed for L and U");
315 TEUCHOS_TEST_FOR_EXCEPTION(info > 0,
317 "ShyLUBasker: Zero pivot found at: " << info );
323 template <
class Matrix,
class Vector>
329 #ifdef HAVE_AMESOS2_TIMERS
330 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
337 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
338 const size_t nrhs = X->getGlobalNumVectors();
340 const bool ShyluBaskerTransposeRequest = this->control_.useTranspose_;
341 const bool initialize_data =
true;
342 const bool do_not_initialize_data =
false;
344 if ( single_proc_optimization() && nrhs == 1 ) {
347 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
348 host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
350 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
351 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
356 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
357 host_solve_array_t>::do_get(initialize_data, B, bValues_,
359 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
360 this->rowIndexBase_);
363 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
364 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
366 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
367 this->rowIndexBase_);
371 shylubasker_dtype * pxValues = function_map::convert_scalar(xValues_.data());
372 shylubasker_dtype * pbValues = function_map::convert_scalar(bValues_.data());
373 if (!ShyluBaskerTransposeRequest)
374 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues);
376 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues,
true);
380 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
382 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0,
384 "Encountered zero diag element at: " << ierr);
385 TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1,
387 "Could not alloc needed working memory for solve" );
389 Util::put_1d_data_helper_kokkos_view<
392 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
398 template <
class Matrix,
class Vector>
403 return( this->globalNumRows_ == this->globalNumCols_ );
407 template <
class Matrix,
class Vector>
412 using Teuchos::getIntegralValue;
413 using Teuchos::ParameterEntryValidator;
415 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
417 if(parameterList->isParameter(
"IsContiguous"))
419 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous");
422 if(parameterList->isParameter(
"num_threads"))
424 num_threads = parameterList->get<
int>(
"num_threads");
426 if(parameterList->isParameter(
"pivot"))
428 ShyLUbasker->Options.no_pivot = (!parameterList->get<
bool>(
"pivot"));
430 if(parameterList->isParameter(
"delayed pivot"))
432 ShyLUbasker->Options.static_delayed_pivot = (parameterList->get<
int>(
"delayed pivot"));
434 if(parameterList->isParameter(
"pivot_tol"))
436 ShyLUbasker->Options.pivot_tol = parameterList->get<
double>(
"pivot_tol");
438 if(parameterList->isParameter(
"symmetric"))
440 ShyLUbasker->Options.symmetric = parameterList->get<
bool>(
"symmetric");
442 if(parameterList->isParameter(
"realloc"))
444 ShyLUbasker->Options.realloc = parameterList->get<
bool>(
"realloc");
446 if(parameterList->isParameter(
"verbose"))
448 ShyLUbasker->Options.verbose = parameterList->get<
bool>(
"verbose");
450 if(parameterList->isParameter(
"verbose_matrix"))
452 ShyLUbasker->Options.verbose_matrix_out = parameterList->get<
bool>(
"verbose_matrix");
454 if(parameterList->isParameter(
"btf"))
456 ShyLUbasker->Options.btf = parameterList->get<
bool>(
"btf");
458 if(parameterList->isParameter(
"use_metis"))
460 ShyLUbasker->Options.use_metis = parameterList->get<
bool>(
"use_metis");
462 if(parameterList->isParameter(
"use_nodeNDP"))
464 ShyLUbasker->Options.use_nodeNDP = parameterList->get<
bool>(
"use_nodeNDP");
466 if(parameterList->isParameter(
"run_nd_on_leaves"))
468 ShyLUbasker->Options.run_nd_on_leaves = parameterList->get<
bool>(
"run_nd_on_leaves");
470 if(parameterList->isParameter(
"run_amd_on_leaves"))
472 ShyLUbasker->Options.run_amd_on_leaves = parameterList->get<
bool>(
"run_amd_on_leaves");
474 if(parameterList->isParameter(
"amd_on_blocks"))
476 ShyLUbasker->Options.amd_dom = parameterList->get<
bool>(
"amd_on_blocks");
478 if(parameterList->isParameter(
"transpose"))
481 const auto transpose = parameterList->get<
bool>(
"transpose");
483 this->control_.useTranspose_ =
true;
485 if(parameterList->isParameter(
"use_sequential_diag_facto"))
487 ShyLUbasker->Options.use_sequential_diag_facto = parameterList->get<
bool>(
"use_sequential_diag_facto");
489 if(parameterList->isParameter(
"user_fill"))
491 ShyLUbasker->Options.user_fill = parameterList->get<
double>(
"user_fill");
493 if(parameterList->isParameter(
"prune"))
495 ShyLUbasker->Options.prune = parameterList->get<
bool>(
"prune");
497 if(parameterList->isParameter(
"replace_tiny_pivot"))
499 ShyLUbasker->Options.replace_tiny_pivot = parameterList->get<
bool>(
"replace_tiny_pivot");
501 if(parameterList->isParameter(
"btf_matching"))
503 ShyLUbasker->Options.btf_matching = parameterList->get<
int>(
"btf_matching");
504 if (ShyLUbasker->Options.btf_matching == 1 || ShyLUbasker->Options.btf_matching == 2) {
505 ShyLUbasker->Options.matching =
true;
507 ShyLUbasker->Options.matching =
false;
510 if(parameterList->isParameter(
"blk_matching"))
512 ShyLUbasker->Options.blk_matching = parameterList->get<
int>(
"blk_matching");
514 if(parameterList->isParameter(
"matrix_scaling"))
516 ShyLUbasker->Options.matrix_scaling = parameterList->get<
int>(
"matrix_scaling");
518 if(parameterList->isParameter(
"min_block_size"))
520 ShyLUbasker->Options.min_block_size = parameterList->get<
int>(
"min_block_size");
524 template <
class Matrix,
class Vector>
525 Teuchos::RCP<const Teuchos::ParameterList>
528 using Teuchos::ParameterList;
530 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
532 if( is_null(valid_params) )
534 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
535 pl->set(
"IsContiguous",
true,
536 "Are GIDs contiguous");
537 pl->set(
"num_threads", 1,
538 "Number of threads");
539 pl->set(
"pivot",
false,
541 pl->set(
"delayed pivot", 0,
542 "Apply static delayed pivot on a big block");
543 pl->set(
"pivot_tol", .0001,
544 "Tolerance before pivot, currently not used");
545 pl->set(
"symmetric",
false,
546 "Should Symbolic assume symmetric nonzero pattern");
547 pl->set(
"realloc" ,
false,
548 "Should realloc space if not enough");
549 pl->set(
"verbose",
false,
550 "Information about factoring");
551 pl->set(
"verbose_matrix",
false,
552 "Give Permuted Matrices");
555 pl->set(
"prune",
false,
556 "Use prune on BTF blocks (Not Supported)");
557 pl->set(
"btf_matching", 2,
558 "Matching option for BTF: 0 = none, 1 = Basker, 2 = Trilinos (default), (3 = MC64 if enabled)");
559 pl->set(
"blk_matching", 1,
560 "Matching optioon for block: 0 = none, 1 or anything else = Basker (default), (2 = MC64 if enabled)");
561 pl->set(
"matrix_scaling", 0,
562 "Use matrix scaling to biig A BTF block: 0 = no-scaling, 1 = symmetric diagonal scaling, 2 = row-max, and then col-max scaling");
563 pl->set(
"min_block_size", 0,
564 "Size of the minimum diagonal blocks");
565 pl->set(
"replace_tiny_pivot",
false,
566 "Replace tiny pivots during the numerical factorization");
567 pl->set(
"use_metis",
true,
569 pl->set(
"use_nodeNDP",
true,
570 "Use nodeND to compute ND partition");
571 pl->set(
"run_nd_on_leaves",
false,
572 "Run ND on the final leaf-nodes for ND factorization");
573 pl->set(
"run_amd_on_leaves",
false,
574 "Run AMD on the final leaf-nodes for ND factorization");
575 pl->set(
"amd_on_blocks",
true,
576 "Run AMD on each diagonal blocks");
577 pl->set(
"transpose",
false,
578 "Solve the transpose A");
579 pl->set(
"use_sequential_diag_facto",
false,
580 "Use sequential algorithm to factor each diagonal block");
581 pl->set(
"user_fill", (
double)BASKER_FILL_USER,
582 "User-provided padding for the fill ratio");
589 template <
class Matrix,
class Vector>
594 if(current_phase == SOLVE)
return (
false);
596 #ifdef HAVE_AMESOS2_TIMERS
597 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
602 if ( single_proc_optimization() ) {
611 Kokkos::resize(nzvals_view_, this->globalNumNonZeros_);
612 Kokkos::resize(rowind_view_, this->globalNumNonZeros_);
613 Kokkos::resize(colptr_view_, this->globalNumCols_ + 1);
616 local_ordinal_type nnz_ret = 0;
618 #ifdef HAVE_AMESOS2_TIMERS
619 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
624 ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret,
625 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
627 this->rowIndexBase_);
631 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
633 "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals");
641 template<
class Matrix,
class Vector>
647 #endif // AMESOS2_SHYLUBASKER_DEF_HPP
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:648
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
ShyLUBasker specific solve.
Definition: Amesos2_ShyLUBasker_def.hpp:325
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
Amesos2 interface to the Baker package.
Definition: Amesos2_ShyLUBasker_decl.hpp:76
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_ShyLUBasker_def.hpp:526
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_ShyLUBasker_def.hpp:141
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_ShyLUBasker_def.hpp:400
Amesos2 ShyLUBasker declarations.
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_ShyLUBasker_def.hpp:591
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
int numericFactorization_impl()
ShyLUBasker specific numeric factorization.
Definition: Amesos2_ShyLUBasker_def.hpp:225
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition: Amesos2_ShyLUBasker_def.hpp:135