18 #ifndef AMESOS2_KLU2_DEF_HPP
19 #define AMESOS2_KLU2_DEF_HPP
21 #include <Teuchos_Tuple.hpp>
22 #include <Teuchos_ParameterList.hpp>
23 #include <Teuchos_StandardParameterEntryValidators.hpp>
31 template <
class Matrix,
class Vector>
33 Teuchos::RCP<const Matrix> A,
34 Teuchos::RCP<Vector> X,
35 Teuchos::RCP<const Vector> B )
38 , is_contiguous_(true)
41 ::KLU2::klu_defaults<klu2_dtype, local_ordinal_type> (&(data_.common_)) ;
42 data_.symbolic_ = NULL;
43 data_.numeric_ = NULL;
50 template <
class Matrix,
class Vector>
58 if (data_.symbolic_ != NULL)
59 ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
60 (&(data_.symbolic_), &(data_.common_)) ;
61 if (data_.numeric_ != NULL)
62 ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
63 (&(data_.numeric_), &(data_.common_)) ;
76 template <
class Matrix,
class Vector>
79 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
82 template<
class Matrix,
class Vector>
88 #ifdef HAVE_AMESOS2_TIMERS
89 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
96 template <
class Matrix,
class Vector>
100 if (data_.symbolic_ != NULL) {
101 ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
102 (&(data_.symbolic_), &(data_.common_)) ;
105 if ( single_proc_optimization() ) {
106 host_ordinal_type_array host_row_ptr_view;
107 host_ordinal_type_array host_cols_view;
108 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
109 this->matrixA_->returnColInd_kokkos_view(host_cols_view);
110 data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
111 ((local_ordinal_type)this->globalNumCols_, host_row_ptr_view.data(),
112 host_cols_view.data(), &(data_.common_)) ;
116 data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
117 ((local_ordinal_type)this->globalNumCols_, host_col_ptr_view_.data(),
118 host_rows_view_.data(), &(data_.common_)) ;
126 template <
class Matrix,
class Vector>
140 #ifdef HAVE_AMESOS2_TIMERS
141 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
144 if (data_.numeric_ != NULL) {
145 ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
146 (&(data_.numeric_), &(data_.common_));
149 if ( single_proc_optimization() ) {
150 host_ordinal_type_array host_row_ptr_view;
151 host_ordinal_type_array host_cols_view;
152 this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
153 this->matrixA_->returnColInd_kokkos_view(host_cols_view);
154 this->matrixA_->returnValues_kokkos_view(host_nzvals_view_);
155 klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
156 data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
157 (host_row_ptr_view.data(), host_cols_view.data(), pValues,
158 data_.symbolic_, &(data_.common_));
161 klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
162 data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
163 (host_col_ptr_view_.data(), host_rows_view_.data(), pValues,
164 data_.symbolic_, &(data_.common_));
172 if(data_.numeric_ ==
nullptr) {
179 this->setNnzLU( as<size_t>((data_.numeric_)->lnz) + as<size_t>((data_.numeric_)->unz) );
186 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
188 TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error,
189 "KLU2 numeric factorization failed(info="+std::to_string(info)+
")");
194 template <
class Matrix,
class Vector>
203 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
204 const size_t nrhs = X->getGlobalNumVectors();
208 bool use_gather = use_gather_;
209 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1);
210 use_gather = (use_gather && (std::is_same<vector_scalar_type, float>::value ||
211 std::is_same<vector_scalar_type, double>::value));
213 #ifdef HAVE_AMESOS2_TIMERS
214 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
216 const bool initialize_data =
true;
217 const bool do_not_initialize_data =
false;
218 if ( single_proc_optimization() && nrhs == 1 ) {
220 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
221 host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
223 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
224 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
228 int rval = B->gather(bValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
229 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
231 X->gather(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
232 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
240 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
241 host_solve_array_t>::do_get(initialize_data, B, bValues_,
243 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
244 this->rowIndexBase_);
246 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
247 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
249 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
250 this->rowIndexBase_);
260 Kokkos::deep_copy(xValues_, bValues_);
268 klu2_dtype * pxValues = function_map::convert_scalar(xValues_.data());
269 klu2_dtype * pbValues = function_map::convert_scalar(bValues_.data());
273 TEUCHOS_TEST_FOR_EXCEPTION(pbValues ==
nullptr,
274 std::runtime_error,
"Amesos2 Runtime Error: b_vector returned null ");
276 TEUCHOS_TEST_FOR_EXCEPTION(pxValues ==
nullptr,
277 std::runtime_error,
"Amesos2 Runtime Error: x_vector returned null ");
280 if ( single_proc_optimization() && nrhs == 1 ) {
281 #ifdef HAVE_AMESOS2_TIMERS
282 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
290 ::KLU2::klu_tsolve2<klu2_dtype, local_ordinal_type>
291 (data_.symbolic_, data_.numeric_,
292 (local_ordinal_type)this->globalNumCols_,
293 (local_ordinal_type)nrhs,
294 pbValues, pxValues, &(data_.common_)) ;
297 ::KLU2::klu_solve2<klu2_dtype, local_ordinal_type>
298 (data_.symbolic_, data_.numeric_,
299 (local_ordinal_type)this->globalNumCols_,
300 (local_ordinal_type)nrhs,
301 pbValues, pxValues, &(data_.common_)) ;
312 #ifdef HAVE_AMESOS2_TIMERS
313 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
320 if ( single_proc_optimization() ) {
321 ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
322 (data_.symbolic_, data_.numeric_,
323 (local_ordinal_type)this->globalNumCols_,
324 (local_ordinal_type)nrhs,
325 pxValues, &(data_.common_)) ;
328 ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
329 (data_.symbolic_, data_.numeric_,
330 (local_ordinal_type)this->globalNumCols_,
331 (local_ordinal_type)nrhs,
332 pxValues, &(data_.common_)) ;
340 if ( single_proc_optimization() ) {
341 ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
342 (data_.symbolic_, data_.numeric_,
343 (local_ordinal_type)this->globalNumCols_,
344 (local_ordinal_type)nrhs,
345 pxValues, &(data_.common_)) ;
348 ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
349 (data_.symbolic_, data_.numeric_,
350 (local_ordinal_type)this->globalNumCols_,
351 (local_ordinal_type)nrhs,
352 pxValues, &(data_.common_)) ;
361 #ifdef HAVE_AMESOS2_TIMERS
362 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
365 int rval = X->scatter(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
366 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
367 if (rval != 0) use_gather =
false;
370 Util::put_1d_data_helper_kokkos_view<
373 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
374 this->rowIndexBase_);
381 template <
class Matrix,
class Vector>
388 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
392 template <
class Matrix,
class Vector>
397 using Teuchos::getIntegralValue;
398 using Teuchos::ParameterEntryValidator;
400 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
402 transFlag_ = this->control_.useTranspose_ ? 1: 0;
404 if( parameterList->isParameter(
"Trans") ){
405 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"Trans").validator();
406 parameterList->getEntry(
"Trans").setValidator(trans_validator);
408 transFlag_ = getIntegralValue<int>(*parameterList,
"Trans");
411 if( parameterList->isParameter(
"IsContiguous") ){
412 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous");
414 if( parameterList->isParameter(
"UseCustomGather") ){
415 use_gather_ = parameterList->get<
bool>(
"UseCustomGather");
420 template <
class Matrix,
class Vector>
421 Teuchos::RCP<const Teuchos::ParameterList>
425 using Teuchos::tuple;
426 using Teuchos::ParameterList;
427 using Teuchos::setStringToIntegralParameter;
429 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
431 if( is_null(valid_params) )
433 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
435 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve, does nothing now");
436 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
437 pl->set(
"UseCustomGather",
true,
"Whether to use new matrix-gather routine");
439 setStringToIntegralParameter<int>(
"Trans",
"NOTRANS",
440 "Solve for the transpose system or not",
441 tuple<string>(
"NOTRANS",
"TRANS",
"CONJ"),
442 tuple<string>(
"Solve with transpose",
443 "Do not solve with transpose",
444 "Solve with the conjugate transpose"),
454 template <
class Matrix,
class Vector>
459 #ifdef HAVE_AMESOS2_TIMERS
460 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
463 if(current_phase == SOLVE)
return(
false);
465 if ( single_proc_optimization() ) {
472 if (host_nzvals_view_.extent(0) != this->globalNumNonZeros_)
473 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
474 if (host_rows_view_.extent(0) != this->globalNumNonZeros_)
475 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
476 if (host_col_ptr_view_.extent(0) != (this->globalNumRows_ + 1))
477 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
479 local_ordinal_type nnz_ret = -1;
480 bool use_gather = use_gather_;
481 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1);
482 use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value));
484 #ifdef HAVE_AMESOS2_TIMERS
485 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
488 bool column_major =
true;
489 if (!is_contiguous_) {
490 auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase);
491 nnz_ret = contig_mat->gather(host_nzvals_view_, host_rows_view_, host_col_ptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
492 this->transpose_map, this->nzvals_t, column_major, current_phase);
494 nnz_ret = this->matrixA_->gather(host_nzvals_view_, host_rows_view_, host_col_ptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
495 this->transpose_map, this->nzvals_t, column_major, current_phase);
499 if (nnz_ret < 0) use_gather =
false;
504 ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_, nnz_ret,
505 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
507 this->rowIndexBase_);
512 if (use_gather || this->root_) {
513 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
515 "Amesos2_KLU2 loadA_impl: Did not get the expected number of non-zero vals("
516 +std::to_string(nnz_ret)+
" vs "+std::to_string(this->globalNumNonZeros_)+
")");
524 template<
class Matrix,
class Vector>
530 #endif // AMESOS2_KLU2_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
KLU2(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_KLU2_def.hpp:32
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:618
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
KLU2 specific solve.
Definition: Amesos2_KLU2_def.hpp:196
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Amesos2 KLU2 declarations.
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_KLU2_def.hpp:456
~KLU2()
Destructor.
Definition: Amesos2_KLU2_def.hpp:51
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_KLU2_def.hpp:394
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using KLU2.
Definition: Amesos2_KLU2_def.hpp:98
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_KLU2_def.hpp:84
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_KLU2_def.hpp:383
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
int numericFactorization_impl()
KLU2 specific numeric factorization.
Definition: Amesos2_KLU2_def.hpp:128
Amesos2 interface to the KLU2 package.
Definition: Amesos2_KLU2_decl.hpp:38
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition: Amesos2_KLU2_def.hpp:78
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_KLU2_def.hpp:422
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142