52 #ifndef AMESOS2_KLU2_DEF_HPP
53 #define AMESOS2_KLU2_DEF_HPP
55 #include <Teuchos_Tuple.hpp>
56 #include <Teuchos_ParameterList.hpp>
57 #include <Teuchos_StandardParameterEntryValidators.hpp>
65 template <
class Matrix,
class Vector>
67 Teuchos::RCP<const Matrix> A,
68 Teuchos::RCP<Vector> X,
69 Teuchos::RCP<const Vector> B )
72 , is_contiguous_(true)
74 ::KLU2::klu_defaults<klu2_dtype, local_ordinal_type> (&(data_.common_)) ;
75 data_.symbolic_ = NULL;
76 data_.numeric_ = NULL;
83 template <
class Matrix,
class Vector>
91 if (data_.symbolic_ != NULL)
92 ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
93 (&(data_.symbolic_), &(data_.common_)) ;
94 if (data_.numeric_ != NULL)
95 ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
96 (&(data_.numeric_), &(data_.common_)) ;
109 template <
class Matrix,
class Vector>
112 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
115 template<
class Matrix,
class Vector>
121 #ifdef HAVE_AMESOS2_TIMERS
122 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
129 template <
class Matrix,
class Vector>
133 if (data_.symbolic_ != NULL) {
134 ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
135 (&(data_.symbolic_), &(data_.common_)) ;
138 if ( single_proc_optimization() ) {
139 host_ordinal_type_array host_row_ptr_view;
140 host_ordinal_type_array host_cols_view;
141 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
142 this->matrixA_->returnColInd_kokkos_view(host_cols_view);
143 data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
144 ((local_ordinal_type)this->globalNumCols_, host_row_ptr_view.data(),
145 host_cols_view.data(), &(data_.common_)) ;
149 data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
150 ((local_ordinal_type)this->globalNumCols_, host_col_ptr_view_.data(),
151 host_rows_view_.data(), &(data_.common_)) ;
159 template <
class Matrix,
class Vector>
173 #ifdef HAVE_AMESOS2_TIMERS
174 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
177 if (data_.numeric_ != NULL) {
178 ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
179 (&(data_.numeric_), &(data_.common_));
182 if ( single_proc_optimization() ) {
183 host_ordinal_type_array host_row_ptr_view;
184 host_ordinal_type_array host_cols_view;
185 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
186 this->matrixA_->returnColInd_kokkos_view(host_cols_view);
187 this->matrixA_->returnValues_kokkos_view(host_nzvals_view_);
188 klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
189 data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
190 (host_row_ptr_view.data(), host_cols_view.data(), pValues,
191 data_.symbolic_, &(data_.common_));
194 klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
195 data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
196 (host_col_ptr_view_.data(), host_rows_view_.data(), pValues,
197 data_.symbolic_, &(data_.common_));
205 if(data_.numeric_ ==
nullptr) {
212 this->setNnzLU( as<size_t>((data_.numeric_)->lnz) + as<size_t>((data_.numeric_)->unz) );
219 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
221 TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error,
222 "KLU2 numeric factorization failed");
227 template <
class Matrix,
class Vector>
236 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
237 const size_t nrhs = X->getGlobalNumVectors();
240 #ifdef HAVE_AMESOS2_TIMERS
241 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
242 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
244 if ( single_proc_optimization() && nrhs == 1 ) {
246 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
247 host_solve_array_t>::do_get(B, bValues_, as<size_t>(ld_rhs));
249 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
250 host_solve_array_t>::do_get(X, xValues_, as<size_t>(ld_rhs));
253 if ( is_contiguous_ ==
true ) {
254 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
255 host_solve_array_t>::do_get(B, bValues_,
257 ROOTED, this->rowIndexBase_);
260 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
261 host_solve_array_t>::do_get(B, bValues_,
263 CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
267 if ( is_contiguous_ ==
true ) {
268 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
269 host_solve_array_t>::do_get(X, xValues_,
271 ROOTED, this->rowIndexBase_);
274 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
275 host_solve_array_t>::do_get(X, xValues_,
277 CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
289 Kokkos::deep_copy(xValues_, bValues_);
293 klu2_dtype * pxValues = function_map::convert_scalar(xValues_.data());
294 klu2_dtype * pbValues = function_map::convert_scalar(bValues_.data());
298 TEUCHOS_TEST_FOR_EXCEPTION(pbValues ==
nullptr,
299 std::runtime_error,
"Amesos2 Runtime Error: b_vector returned null ");
301 TEUCHOS_TEST_FOR_EXCEPTION(pxValues ==
nullptr,
302 std::runtime_error,
"Amesos2 Runtime Error: x_vector returned null ");
305 if ( single_proc_optimization() && nrhs == 1 ) {
306 #ifdef HAVE_AMESOS2_TIMERS
307 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
315 ::KLU2::klu_tsolve2<klu2_dtype, local_ordinal_type>
316 (data_.symbolic_, data_.numeric_,
317 (local_ordinal_type)this->globalNumCols_,
318 (local_ordinal_type)nrhs,
319 pbValues, pxValues, &(data_.common_)) ;
322 ::KLU2::klu_solve2<klu2_dtype, local_ordinal_type>
323 (data_.symbolic_, data_.numeric_,
324 (local_ordinal_type)this->globalNumCols_,
325 (local_ordinal_type)nrhs,
326 pbValues, pxValues, &(data_.common_)) ;
337 #ifdef HAVE_AMESOS2_TIMERS
338 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
345 if ( single_proc_optimization() ) {
346 ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
347 (data_.symbolic_, data_.numeric_,
348 (local_ordinal_type)this->globalNumCols_,
349 (local_ordinal_type)nrhs,
350 pxValues, &(data_.common_)) ;
353 ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
354 (data_.symbolic_, data_.numeric_,
355 (local_ordinal_type)this->globalNumCols_,
356 (local_ordinal_type)nrhs,
357 pxValues, &(data_.common_)) ;
365 if ( single_proc_optimization() ) {
366 ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
367 (data_.symbolic_, data_.numeric_,
368 (local_ordinal_type)this->globalNumCols_,
369 (local_ordinal_type)nrhs,
370 pxValues, &(data_.common_)) ;
373 ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
374 (data_.symbolic_, data_.numeric_,
375 (local_ordinal_type)this->globalNumCols_,
376 (local_ordinal_type)nrhs,
377 pxValues, &(data_.common_)) ;
384 #ifdef HAVE_AMESOS2_TIMERS
385 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
388 if ( is_contiguous_ ==
true ) {
389 Util::put_1d_data_helper_kokkos_view<
392 ROOTED, this->rowIndexBase_);
395 Util::put_1d_data_helper_kokkos_view<
398 CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
406 template <
class Matrix,
class Vector>
413 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
417 template <
class Matrix,
class Vector>
422 using Teuchos::getIntegralValue;
423 using Teuchos::ParameterEntryValidator;
425 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
427 transFlag_ = this->control_.useTranspose_ ? 1: 0;
429 if( parameterList->isParameter(
"Trans") ){
430 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"Trans").validator();
431 parameterList->getEntry(
"Trans").setValidator(trans_validator);
433 transFlag_ = getIntegralValue<int>(*parameterList,
"Trans");
436 if( parameterList->isParameter(
"IsContiguous") ){
437 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous");
442 template <
class Matrix,
class Vector>
443 Teuchos::RCP<const Teuchos::ParameterList>
447 using Teuchos::tuple;
448 using Teuchos::ParameterList;
449 using Teuchos::setStringToIntegralParameter;
451 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
453 if( is_null(valid_params) )
455 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
457 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve, does nothing now");
458 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
460 setStringToIntegralParameter<int>(
"Trans",
"NOTRANS",
461 "Solve for the transpose system or not",
462 tuple<string>(
"NOTRANS",
"TRANS",
"CONJ"),
463 tuple<string>(
"Solve with transpose",
464 "Do not solve with transpose",
465 "Solve with the conjugate transpose"),
475 template <
class Matrix,
class Vector>
481 if(current_phase == SOLVE)
return(
false);
483 if ( single_proc_optimization() ) {
489 #ifdef HAVE_AMESOS2_TIMERS
490 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
495 host_nzvals_view_ = host_value_type_array(
496 Kokkos::ViewAllocateWithoutInitializing(
"host_nzvals_view_"), this->globalNumNonZeros_);
497 host_rows_view_ = host_ordinal_type_array(
498 Kokkos::ViewAllocateWithoutInitializing(
"host_rows_view_"), this->globalNumNonZeros_);
499 host_col_ptr_view_ = host_ordinal_type_array(
500 Kokkos::ViewAllocateWithoutInitializing(
"host_col_ptr_view_"), this->globalNumRows_ + 1);
503 local_ordinal_type nnz_ret = 0;
505 #ifdef HAVE_AMESOS2_TIMERS
506 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
509 if ( is_contiguous_ ==
true ) {
510 Util::get_ccs_helper_kokkos_view<
512 ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_,
513 nnz_ret, ROOTED, ARBITRARY, this->rowIndexBase_);
516 Util::get_ccs_helper_kokkos_view<
518 ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_,
519 nnz_ret, CONTIGUOUS_AND_ROOTED, ARBITRARY, this->rowIndexBase_);
525 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
527 "Did not get the expected number of non-zero vals");
536 template<
class Matrix,
class Vector>
542 #endif // AMESOS2_KLU2_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
KLU2(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_KLU2_def.hpp:66
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
KLU2 specific solve.
Definition: Amesos2_KLU2_def.hpp:229
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
Amesos2 KLU2 declarations.
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_KLU2_def.hpp:477
~KLU2()
Destructor.
Definition: Amesos2_KLU2_def.hpp:84
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_KLU2_def.hpp:419
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using KLU2.
Definition: Amesos2_KLU2_def.hpp:131
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_KLU2_def.hpp:117
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_KLU2_def.hpp:408
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
int numericFactorization_impl()
KLU2 specific numeric factorization.
Definition: Amesos2_KLU2_def.hpp:161
Amesos2 interface to the KLU2 package.
Definition: Amesos2_KLU2_decl.hpp:72
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition: Amesos2_KLU2_def.hpp:111
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_KLU2_def.hpp:444
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176