53 #ifndef AMESOS2_CHOLMOD_DEF_HPP
54 #define AMESOS2_CHOLMOD_DEF_HPP
56 #include <Teuchos_Tuple.hpp>
57 #include <Teuchos_ParameterList.hpp>
58 #include <Teuchos_StandardParameterEntryValidators.hpp>
63 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
64 #include "KokkosSparse_sptrsv_cholmod.hpp"
70 template <
class Matrix,
class Vector>
72 Teuchos::RCP<const Matrix> A,
73 Teuchos::RCP<Vector> X,
74 Teuchos::RCP<const Vector> B )
79 , is_contiguous_(true)
80 , use_triangular_solves_(false)
86 cholmod_l_start(&data_.c);
90 data_.c.itype = CHOLMOD_LONG;
92 data_.c.supernodal = CHOLMOD_AUTO;
93 data_.c.quick_return_if_not_posdef = 1;
97 template <
class Matrix,
class Vector>
103 cholmod_l_free_factor(&(data_.L), &(data_.c));
104 cholmod_l_free_dense(&(data_.Y), &data_.c);
105 cholmod_l_free_dense(&(data_.E), &data_.c);
107 cholmod_l_finish(&(data_.c));
110 template<
class Matrix,
class Vector>
114 #ifdef HAVE_AMESOS2_TIMERS
115 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
120 data_.L = cholmod_l_analyze(&data_.A, &(data_.c));
121 info = data_.c.status;
125 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
127 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
129 "Amesos2 cholmod_l_analyze failure in Cholmod preOrdering_impl");
135 template <
class Matrix,
class Vector>
142 #ifdef HAVE_AMESOS2_TIMERS
143 Teuchos::TimeMonitor symFactTimer(this->timers_.symFactTime_);
146 cholmod_l_resymbol (&data_.A, NULL, 0,
true, data_.L, &(data_.c));
148 info = data_.c.status;
155 skip_symfact =
false;
159 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
161 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
163 "Amesos2 cholmod_l_resymbol failure in Cholmod symbolicFactorization_impl");
165 if(use_triangular_solves_) {
166 triangular_solve_symbolic();
173 template <
class Matrix,
class Vector>
179 #ifdef HAVE_AMESOS2_DEBUG
180 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.ncol != Teuchos::as<size_t>(this->globalNumCols_),
182 "Error in converting to cholmod_sparse: wrong number of global columns." );
183 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.nrow != Teuchos::as<size_t>(this->globalNumRows_),
185 "Error in converting to cholmod_sparse: wrong number of global rows." );
188 #ifdef HAVE_AMESOS2_TIMERS
189 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
192 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
196 cholmod_l_factorize(&data_.A, data_.L, &(data_.c));
197 info = data_.c.status;
200 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
202 TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_OUT_OF_MEMORY,
204 "Amesos2 cholmod_l_factorize error code: CHOLMOD_OUT_OF_MEMORY");
206 TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_NOT_POSDEF,
208 "Amesos2 cholmod_l_factorize error code: CHOLMOD_NOT_POSDEF.");
210 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
212 "Amesos2 cholmod_l_factorize error code:" << info);
214 if(use_triangular_solves_) {
215 triangular_solve_numeric();
222 template <
class Matrix,
class Vector>
227 const global_size_type ld_rhs = X->getGlobalLength();
228 const size_t nrhs = X->getGlobalNumVectors();
231 #ifdef HAVE_AMESOS2_TIMERS
232 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
233 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
238 if(use_triangular_solves_) {
239 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
240 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
241 device_solve_array_t>::do_get(B, device_bValues_,
242 Teuchos::as<size_t>(ld_rhs),
243 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
244 this->rowIndexBase_);
245 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
246 device_solve_array_t>::do_get(X, device_xValues_,
247 Teuchos::as<size_t>(ld_rhs),
248 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
249 this->rowIndexBase_);
253 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
254 host_solve_array_t>::do_get(B, host_bValues_,
255 Teuchos::as<size_t>(ld_rhs),
256 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
257 this->rowIndexBase_);
258 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
259 host_solve_array_t>::do_get(X, host_xValues_,
260 Teuchos::as<size_t>(ld_rhs),
261 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
262 this->rowIndexBase_);
269 #ifdef HAVE_AMESOS2_TIMERS
270 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
273 if(use_triangular_solves_) {
277 function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
278 Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_bValues_.data(), &data_.b);
279 function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
280 Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_xValues_.data(), &data_.x);
282 cholmod_dense *xtemp = &(data_.x);
283 cholmod_l_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
284 &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
287 ierr = data_.c.status;
290 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
292 TEUCHOS_TEST_FOR_EXCEPTION(ierr == -2, std::runtime_error,
"Ran out of memory" );
296 #ifdef HAVE_AMESOS2_TIMERS
297 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
300 if(use_triangular_solves_) {
301 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
302 Util::put_1d_data_helper_kokkos_view<
304 Teuchos::as<size_t>(ld_rhs),
305 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
306 this->rowIndexBase_);
310 Util::put_1d_data_helper_kokkos_view<
312 Teuchos::as<size_t>(ld_rhs),
313 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
314 this->rowIndexBase_);
322 template <
class Matrix,
class Vector>
326 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
330 template <
class Matrix,
class Vector>
335 using Teuchos::getIntegralValue;
336 using Teuchos::ParameterEntryValidator;
338 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
340 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous",
true);
341 use_triangular_solves_ = parameterList->get<
bool>(
"Enable_KokkosKernels_TriangularSolves",
false);
343 if(use_triangular_solves_) {
344 #if not defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) || not defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
345 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
346 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_CHOLMOD not configured." );
350 data_.c.dbound = parameterList->get<
double>(
"dbound", 0.0);
351 data_.c.prefer_upper = (parameterList->get<
bool>(
"PreferUpper",
true)) ? 1 : 0;
352 data_.c.print = parameterList->get<
int>(
"print", 3);
353 data_.c.nmethods = parameterList->get<
int>(
"nmethods", 0);
358 #ifdef KOKKOS_ENABLE_CUDA
359 const int default_gpu_setting = 1;
363 const int default_gpu_setting = 0;
366 data_.c.useGPU = parameterList->get<
int>(
"useGPU", default_gpu_setting);
368 bool bSuperNodal = parameterList->get<
bool>(
"SuperNodal",
false);
369 data_.c.supernodal = bSuperNodal ? CHOLMOD_SUPERNODAL : CHOLMOD_AUTO;
373 template <
class Matrix,
class Vector>
374 Teuchos::RCP<const Teuchos::ParameterList>
378 using Teuchos::tuple;
379 using Teuchos::ParameterList;
380 using Teuchos::EnhancedNumberValidator;
381 using Teuchos::setStringToIntegralParameter;
382 using Teuchos::stringToIntegralParameterEntryValidator;
384 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
386 if( is_null(valid_params) ){
387 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
390 Teuchos::RCP<EnhancedNumberValidator<int> > print_validator
391 = Teuchos::rcp(
new EnhancedNumberValidator<int>(0,5));
393 Teuchos::RCP<EnhancedNumberValidator<int> > nmethods_validator
394 = Teuchos::rcp(
new EnhancedNumberValidator<int>(0,9));
396 pl->set(
"nmethods", 0,
"Specifies the number of different ordering methods to try", nmethods_validator);
398 pl->set(
"print", 3,
"Specifies the verbosity of the print statements", print_validator);
400 pl->set(
"dbound", 0.0,
401 "Specifies the smallest absolute value on the diagonal D for the LDL' factorization");
404 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
406 pl->set(
"PreferUpper",
true,
407 "Specifies whether the matrix will be "
408 "stored in upper triangular form.");
410 pl->set(
"useGPU", -1,
"1: Use GPU is 1, 0: Do not use GPU, -1: ENV CHOLMOD_USE_GPU set GPU usage.");
412 pl->set(
"Enable_KokkosKernels_TriangularSolves",
false,
"Whether to use triangular solves.");
414 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
416 pl->set(
"SuperNodal",
false,
"Whether to use super nodal");
425 template <
class Matrix,
class Vector>
429 #ifdef HAVE_AMESOS2_TIMERS
430 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
435 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
436 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
437 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
441 #ifdef HAVE_AMESOS2_TIMERS
442 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
445 TEUCHOS_TEST_FOR_EXCEPTION(this->rowIndexBase_ != this->columnIndexBase_,
447 "Row and column maps have different indexbase ");
449 if ( is_contiguous_ ==
true ) {
450 Util::get_ccs_helper_kokkos_view<
452 host_size_type_array>::do_get(this->matrixA_.ptr(),
453 host_nzvals_view_, host_rows_view_,
454 host_col_ptr_view_, nnz_ret, ROOTED,
456 this->rowIndexBase_);
459 Util::get_ccs_helper_kokkos_view<
461 host_size_type_array>::do_get(this->matrixA_.ptr(),
462 host_nzvals_view_, host_rows_view_,
463 host_col_ptr_view_, nnz_ret, CONTIGUOUS_AND_ROOTED,
465 this->rowIndexBase_);
469 TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
471 "Did not get the expected number of non-zero vals");
473 function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
474 Teuchos::as<size_t>(this->globalNumCols_),
475 Teuchos::as<size_t>(this->globalNumNonZeros_),
477 host_col_ptr_view_.data(),
478 host_nzvals_view_.data(),
479 host_rows_view_.data(),
482 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.stype == 0, std::runtime_error,
483 "CHOLMOD loadA_impl loaded matrix but it is not symmetric.");
488 template <
class Matrix,
class Vector>
492 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
494 device_khL_.create_sptrsv_handle(
495 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n,
true);
496 device_khU_.create_sptrsv_handle(
497 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n,
false);
500 long *long_etree =
static_cast<long*
>(data_.c.Iwork) + 2 * data_.L->n;
501 Kokkos::resize(host_trsv_etree_, data_.L->nsuper);
502 for (
size_t i = 0 ; i < data_.L->nsuper; ++i) {
503 host_trsv_etree_(i) = long_etree[i];
507 device_khL_.set_sptrsv_etree(host_trsv_etree_.data());
508 device_khU_.set_sptrsv_etree(host_trsv_etree_.data());
510 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
511 Kokkos::resize(host_trsv_perm_, ld_rhs);
512 long *iperm =
static_cast<long*
>(data_.L->Perm);
513 for (
size_t i = 0; i < ld_rhs; i++) {
514 host_trsv_perm_(iperm[i]) = i;
516 deep_copy_or_assign_view(device_trsv_perm_, host_trsv_perm_);
519 device_khL_.set_sptrsv_perm(host_trsv_perm_.data());
520 device_khU_.set_sptrsv_perm(host_trsv_perm_.data());
523 KokkosSparse::Experimental::sptrsv_symbolic<long, kernel_handle_type>
524 (&device_khL_, &device_khU_, data_.L, &data_.c);
528 template <
class Matrix,
class Vector>
530 Cholmod<Matrix,Vector>::triangular_solve_numeric()
532 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
534 KokkosSparse::Experimental::sptrsv_compute<long, kernel_handle_type>
535 (&device_khL_, &device_khU_, data_.L, &data_.c);
536 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
539 template <
class Matrix,
class Vector>
541 Cholmod<Matrix,Vector>::triangular_solve()
const
543 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
544 size_t ld_rhs = device_xValues_.extent(0);
545 size_t nrhs = device_xValues_.extent(1);
547 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
548 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
551 auto local_device_bValues = device_bValues_;
552 auto local_device_trsv_perm = device_trsv_perm_;
553 auto local_device_trsv_rhs = device_trsv_rhs_;
554 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
555 KOKKOS_LAMBDA(
size_t j) {
556 for(
size_t k = 0; k < nrhs; ++k) {
557 local_device_trsv_rhs(local_device_trsv_perm(j),k) = local_device_bValues(j,k);
561 for(
size_t k = 0; k < nrhs; ++k) {
562 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
563 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
566 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
569 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
573 auto local_device_xValues = device_xValues_;
574 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
575 KOKKOS_LAMBDA(
size_t j) {
576 for(
size_t k = 0; k < nrhs; ++k) {
577 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm(j),k);
583 template<
class Matrix,
class Vector>
584 const char* Cholmod<Matrix,Vector>::name =
"Cholmod";
589 #endif // AMESOS2_CHOLMOD_DEF_HPP
~Cholmod()
Destructor.
Definition: Amesos2_Cholmod_def.hpp:98
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Cholmod_def.hpp:112
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Cholmod_def.hpp:375
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Cholmod_def.hpp:324
Cholmod(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Cholmod_def.hpp:71
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
CHOLMOD specific solve.
Definition: Amesos2_Cholmod_def.hpp:224
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Cholmod_def.hpp:332
Amesos2 CHOLMOD declarations.
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Cholmod_def.hpp:427
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using CHOLMOD.
Definition: Amesos2_Cholmod_def.hpp:137
Amesos2 interface to the CHOLMOD package.
Definition: Amesos2_Cholmod_decl.hpp:76
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
int numericFactorization_impl()
CHOLMOD specific numeric factorization.
Definition: Amesos2_Cholmod_def.hpp:175