19 #ifndef AMESOS2_SUPERLU_DEF_HPP
20 #define AMESOS2_SUPERLU_DEF_HPP
22 #include <Teuchos_Tuple.hpp>
23 #include <Teuchos_ParameterList.hpp>
24 #include <Teuchos_StandardParameterEntryValidators.hpp>
29 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
36 #include "KokkosSparse_sptrsv_superlu.hpp"
42 template <
class Matrix,
class Vector>
44 Teuchos::RCP<const Matrix> A,
45 Teuchos::RCP<Vector> X,
46 Teuchos::RCP<const Vector> B )
48 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
49 , sptrsv_invert_diag_(true)
50 , sptrsv_invert_offdiag_ (false)
51 , sptrsv_u_in_csr_ (true)
52 , sptrsv_merge_supernodes_ (false)
53 , sptrsv_use_spmv_ (false)
55 , is_contiguous_(true)
56 , use_triangular_solves_(false)
58 , symmetrize_metis_(true)
65 SLU::set_default_options(&(data_.options));
67 data_.options.PrintStat = SLU::NO;
69 SLU::StatInit(&(data_.stat));
71 Kokkos::resize(data_.perm_r, this->globalNumRows_);
72 Kokkos::resize(data_.perm_c, this->globalNumCols_);
73 Kokkos::resize(data_.etree, this->globalNumCols_);
74 Kokkos::resize(data_.R, this->globalNumRows_);
75 Kokkos::resize(data_.C, this->globalNumCols_);
77 data_.relax = SLU::sp_ienv(2);
78 data_.panel_size = SLU::sp_ienv(1);
93 template <
class Matrix,
class Vector>
101 SLU::StatFree( &(data_.stat) ) ;
104 if ( data_.A.Store != NULL ){
105 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
109 if ( data_.L.Store != NULL ){
110 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
111 SLU::Destroy_CompCol_Matrix( &(data_.U) );
115 template <
class Matrix,
class Vector>
119 std::ostringstream oss;
120 oss <<
"SuperLU solver interface";
122 oss <<
", \"ILUTP\" : {";
123 oss <<
"drop tol = " << data_.options.ILU_DropTol;
124 oss <<
", fill factor = " << data_.options.ILU_FillFactor;
125 oss <<
", fill tol = " << data_.options.ILU_FillTol;
126 switch(data_.options.ILU_MILU) {
138 oss <<
", regular ILU";
140 switch(data_.options.ILU_Norm) {
149 oss <<
", infinity-norm";
153 oss <<
", direct solve";
169 template<
class Matrix,
class Vector>
181 int permc_spec = data_.options.ColPerm;
182 if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
183 #ifdef HAVE_AMESOS2_TIMERS
184 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
187 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
194 template <
class Matrix,
class Vector>
209 #ifdef HAVE_AMESOS2_TIMERS
210 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
212 same_symbolic_ =
false;
213 data_.options.Fact = SLU::DOFACT;
216 #ifdef HAVE_AMESOS2_TIMERS
217 Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for METIS");
218 Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
220 size_t n = this->globalNumRows_;
221 size_t nz = this->globalNumNonZeros_;
222 host_size_type_array host_rowind_view_ = host_rows_view_;
223 host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
226 if (symmetrize_metis_) {
233 SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
234 &new_nz, &new_col_ptr, &new_row_ind);
239 Kokkos::resize (host_rowind_view_, nz);
240 Kokkos::realloc(host_colptr_view_, n+1);
241 for (
size_t i = 0; i <= n; i++) {
242 host_colptr_view_(i) = new_col_ptr[i];
244 for (
size_t i = 0; i < nz; i++) {
245 host_rowind_view_(i) = new_row_ind[i];
249 SLU::SUPERLU_FREE(new_col_ptr);
251 SLU::SUPERLU_FREE(new_row_ind);
256 using metis_int_array_type = host_ordinal_type_array;
257 metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_perm"), n);
258 metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_iperm"), n);
262 Amesos2::Util::reorder(
263 host_colptr_view_, host_rowind_view_,
264 metis_perm, metis_iperm, new_nnz,
false);
266 for (
size_t i = 0; i < n; i++) {
267 data_.perm_r(i) = metis_iperm(i);
268 data_.perm_c(i) = metis_iperm(i);
272 data_.options.ColPerm = SLU::MY_PERMC;
273 data_.options.RowPerm = SLU::MY_PERMR;
281 template <
class Matrix,
class Vector>
290 if ( !same_symbolic_ && data_.L.Store != NULL ){
291 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
292 SLU::Destroy_CompCol_Matrix( &(data_.U) );
293 data_.L.Store = NULL;
294 data_.U.Store = NULL;
297 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
303 #ifdef HAVE_AMESOS2_DEBUG
304 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
306 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
307 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
309 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
312 if( data_.options.Equil == SLU::YES ){
313 magnitude_type rowcnd, colcnd, amax;
316 function_map::gsequ(&(data_.A), data_.R.data(),
317 data_.C.data(), &rowcnd, &colcnd,
322 function_map::laqgs(&(data_.A), data_.R.data(),
323 data_.C.data(), rowcnd, colcnd,
324 amax, &(data_.equed));
327 data_.rowequ = (data_.equed ==
'R') || (data_.equed ==
'B');
328 data_.colequ = (data_.equed ==
'C') || (data_.equed ==
'B');
335 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
336 data_.etree.data(), &(data_.AC));
339 #ifdef HAVE_AMESOS2_TIMERS
340 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
343 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
344 std::cout <<
"Superlu:: Before numeric factorization" << std::endl;
345 std::cout <<
"nzvals_ : " << nzvals_.toString() << std::endl;
346 std::cout <<
"rowind_ : " << rowind_.toString() << std::endl;
347 std::cout <<
"colptr_ : " << colptr_.toString() << std::endl;
350 if(ILU_Flag_==
false) {
351 function_map::gstrf(&(data_.options), &(data_.AC),
352 data_.relax, data_.panel_size, data_.etree.data(),
353 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
354 &(data_.L), &(data_.U),
355 #ifdef HAVE_AMESOS2_SUPERLU5_API
358 &(data_.stat), &info);
361 function_map::gsitrf(&(data_.options), &(data_.AC),
362 data_.relax, data_.panel_size, data_.etree.data(),
363 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
364 &(data_.L), &(data_.U),
365 #ifdef HAVE_AMESOS2_SUPERLU5_API
368 &(data_.stat), &info);
371 if (data_.options.ConditionNumber == SLU::YES) {
373 if (data_.options.Trans == SLU::NOTRANS) {
374 *(
unsigned char *)norm =
'1';
376 *(
unsigned char *)norm =
'I';
379 data_.anorm = function_map::langs(norm, &(data_.A));
380 function_map::gscon(norm, &(data_.L), &(data_.U),
381 data_.anorm, &(data_.rcond),
382 &(data_.stat), &info);
386 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
389 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
390 ((SLU::NCformat*)data_.U.Store)->nnz) );
394 if( data_.options.Equil == SLU::YES ) {
396 Teuchos::broadcast(*(this->getComm()), 0, &info2);
398 TEUCHOS_TEST_FOR_EXCEPTION
399 (info2 < 0, std::runtime_error,
400 "SuperLU's gsequ function returned with status " << info2 <<
" < 0."
401 " This means that argument " << (-info2) <<
" given to the function"
402 " had an illegal value.");
404 if (info2 <= data_.A.nrow) {
405 TEUCHOS_TEST_FOR_EXCEPTION
406 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
407 "info = " << info2 <<
" > 0, and info <= A.nrow = " << data_.A.nrow
408 <<
". This means that row " << info2 <<
" of A is exactly zero.");
410 else if (info2 > data_.A.ncol) {
411 TEUCHOS_TEST_FOR_EXCEPTION
412 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
413 "info = " << info2 <<
" > 0, and info > A.ncol = " << data_.A.ncol
414 <<
". This means that column " << (info2 - data_.A.nrow) <<
" of "
415 "A is exactly zero.");
418 TEUCHOS_TEST_FOR_EXCEPTION
419 (
true, std::runtime_error,
"SuperLU's gsequ function returned "
420 "with info = " << info2 <<
" > 0, but its value is not in the "
421 "range permitted by the documentation. This should never happen "
422 "(it appears to be SuperLU's fault).");
428 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
430 global_size_type info_st = as<global_size_type>(info);
431 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
433 "Factorization complete, but matrix is singular. Division by zero eminent");
434 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
436 "Memory allocation failure in Superlu factorization");
438 data_.options.Fact = SLU::FACTORED;
439 same_symbolic_ =
true;
441 if(use_triangular_solves_) {
442 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
443 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
444 if (this->getComm()->getRank()) {
445 std::cout <<
" > Metis : " << (use_metis_ ?
"YES" :
"NO") << std::endl;
446 std::cout <<
" > Equil : " << (data_.options.Equil == SLU::YES ?
"YES" :
"NO") << std::endl;
447 std::cout <<
" > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ?
"YES" :
"NO") << std::endl;
448 std::cout <<
" > Invert diag : " << sptrsv_invert_diag_ << std::endl;
449 std::cout <<
" > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
450 std::cout <<
" > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
451 std::cout <<
" > Merge : " << sptrsv_merge_supernodes_ << std::endl;
452 std::cout <<
" > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
460 #ifdef HAVE_AMESOS2_TIMERS
461 Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv setup");
462 Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
464 triangular_solve_factor();
471 template <
class Matrix,
class Vector>
477 #ifdef HAVE_AMESOS2_TIMERS
478 Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for Amesos2");
479 Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
482 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
483 const size_t nrhs = X->getGlobalNumVectors();
485 bool bDidAssignX =
false;
486 bool bDidAssignB =
false;
488 #ifdef HAVE_AMESOS2_TIMERS
489 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
490 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
495 const bool initialize_data =
true;
496 const bool do_not_initialize_data =
false;
497 if(use_triangular_solves_) {
498 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
499 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
500 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
502 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
503 this->rowIndexBase_);
504 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
505 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
507 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
508 this->rowIndexBase_);
512 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
513 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
515 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
516 this->rowIndexBase_);
517 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
518 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
520 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
521 this->rowIndexBase_);
529 if(bDidAssignB && data_.equed !=
'N') {
530 if(use_triangular_solves_) {
531 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
532 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
533 device_bValues_.extent(0), device_bValues_.extent(1));
534 Kokkos::deep_copy(copyB, device_bValues_);
535 device_bValues_ = copyB;
539 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
540 host_bValues_.extent(0), host_bValues_.extent(1));
541 Kokkos::deep_copy(copyB, host_bValues_);
542 host_bValues_ = copyB;
548 magnitude_type rpg, rcond;
553 #ifdef HAVE_AMESOS2_TIMERS
554 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
557 if(use_triangular_solves_) {
561 Kokkos::resize(data_.ferr, nrhs);
562 Kokkos::resize(data_.berr, nrhs);
565 #ifdef HAVE_AMESOS2_TIMERS
566 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
568 SLU::Dtype_t dtype = type_map::dtype;
569 int i_ld_rhs = as<int>(ld_rhs);
570 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
571 convert_bValues_, host_bValues_, i_ld_rhs,
572 SLU::SLU_DN, dtype, SLU::SLU_GE);
573 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
574 convert_xValues_, host_xValues_, i_ld_rhs,
575 SLU::SLU_DN, dtype, SLU::SLU_GE);
578 if(ILU_Flag_==
false) {
579 function_map::gssvx(&(data_.options), &(data_.A),
580 data_.perm_c.data(), data_.perm_r.data(),
581 data_.etree.data(), &(data_.equed), data_.R.data(),
582 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
583 &(data_.X), &rpg, &rcond, data_.ferr.data(),
585 #ifdef HAVE_AMESOS2_SUPERLU5_API
588 &(data_.mem_usage), &(data_.stat), &ierr);
591 function_map::gsisx(&(data_.options), &(data_.A),
592 data_.perm_c.data(), data_.perm_r.data(),
593 data_.etree.data(), &(data_.equed), data_.R.data(),
594 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
595 &(data_.X), &rpg, &rcond,
596 #ifdef HAVE_AMESOS2_SUPERLU5_API
599 &(data_.mem_usage), &(data_.stat), &ierr);
603 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
604 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
605 data_.X.Store = NULL;
606 data_.B.Store = NULL;
613 #ifdef HAVE_AMESOS2_TIMERS
614 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
616 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
617 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
621 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
622 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
623 data_.X.Store = NULL;
624 data_.B.Store = NULL;
628 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
630 global_size_type ierr_st = as<global_size_type>(ierr);
631 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
632 std::invalid_argument,
633 "Argument " << -ierr <<
" to SuperLU xgssvx had illegal value" );
634 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
636 "Factorization complete, but U is exactly singular" );
637 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
639 "SuperLU allocated " << ierr - this->globalNumCols_ <<
" bytes of "
640 "memory before allocation failure occured." );
647 #ifdef HAVE_AMESOS2_TIMERS
648 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
651 if(use_triangular_solves_) {
652 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
653 Util::put_1d_data_helper_kokkos_view<
656 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
657 this->rowIndexBase_);
661 Util::put_1d_data_helper_kokkos_view<
664 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
665 this->rowIndexBase_);
673 template <
class Matrix,
class Vector>
680 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
684 template <
class Matrix,
class Vector>
689 using Teuchos::getIntegralValue;
690 using Teuchos::ParameterEntryValidator;
692 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
694 ILU_Flag_ = parameterList->get<
bool>(
"ILU_Flag",
false);
696 SLU::ilu_set_default_options(&(data_.options));
698 data_.options.PrintStat = SLU::NO;
701 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
703 if( parameterList->isParameter(
"Trans") ){
704 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"Trans").validator();
705 parameterList->getEntry(
"Trans").setValidator(trans_validator);
707 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList,
"Trans");
710 if( parameterList->isParameter(
"IterRefine") ){
711 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry(
"IterRefine").validator();
712 parameterList->getEntry(
"IterRefine").setValidator(refine_validator);
714 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList,
"IterRefine");
717 if( parameterList->isParameter(
"ColPerm") ){
718 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
719 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
721 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList,
"ColPerm");
724 data_.options.DiagPivotThresh = parameterList->get<
double>(
"DiagPivotThresh", 1.0);
726 bool equil = parameterList->get<
bool>(
"Equil",
true);
727 data_.options.Equil = equil ? SLU::YES : SLU::NO;
729 bool condNum = parameterList->get<
bool>(
"ConditionNumber",
false);
730 data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
732 bool symmetric_mode = parameterList->get<
bool>(
"SymmetricMode",
false);
733 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
736 if( parameterList->isParameter(
"RowPerm") ){
737 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry(
"RowPerm").validator();
738 parameterList->getEntry(
"RowPerm").setValidator(rowperm_validator);
739 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList,
"RowPerm");
748 data_.options.ILU_DropTol = parameterList->get<
double>(
"ILU_DropTol", 0.0001);
750 data_.options.ILU_FillFactor = parameterList->get<
double>(
"ILU_FillFactor", 10.0);
752 if( parameterList->isParameter(
"ILU_Norm") ) {
753 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry(
"ILU_Norm").validator();
754 parameterList->getEntry(
"ILU_Norm").setValidator(norm_validator);
755 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList,
"ILU_Norm");
758 if( parameterList->isParameter(
"ILU_MILU") ) {
759 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry(
"ILU_MILU").validator();
760 parameterList->getEntry(
"ILU_MILU").setValidator(milu_validator);
761 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList,
"ILU_MILU");
764 data_.options.ILU_FillTol = parameterList->get<
double>(
"ILU_FillTol", 0.01);
766 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous",
true);
768 use_metis_ = parameterList->get<
bool>(
"UseMetis",
false);
769 symmetrize_metis_ = parameterList->get<
bool>(
"SymmetrizeMetis",
true);
771 use_triangular_solves_ = parameterList->get<
bool>(
"Enable_KokkosKernels_TriangularSolves",
false);
772 if(use_triangular_solves_) {
773 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
775 sptrsv_invert_diag_ = parameterList->get<
bool>(
"SpTRSV_Invert_Diag",
true);
777 sptrsv_invert_offdiag_ = parameterList->get<
bool>(
"SpTRSV_Invert_OffDiag",
false);
779 sptrsv_u_in_csr_ = parameterList->get<
bool>(
"SpTRSV_U_CSR",
true);
781 sptrsv_merge_supernodes_ = parameterList->get<
bool>(
"SpTRSV_Merge_Supernodes",
false);
783 sptrsv_use_spmv_ = parameterList->get<
bool>(
"SpTRSV_Use_SpMV",
false);
785 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
786 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
792 template <
class Matrix,
class Vector>
793 Teuchos::RCP<const Teuchos::ParameterList>
797 using Teuchos::tuple;
798 using Teuchos::ParameterList;
799 using Teuchos::EnhancedNumberValidator;
800 using Teuchos::setStringToIntegralParameter;
801 using Teuchos::stringToIntegralParameterEntryValidator;
803 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
805 if( is_null(valid_params) ){
806 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
808 setStringToIntegralParameter<SLU::trans_t>(
"Trans",
"NOTRANS",
809 "Solve for the transpose system or not",
810 tuple<string>(
"TRANS",
"NOTRANS",
"CONJ"),
811 tuple<string>(
"Solve with transpose",
812 "Do not solve with transpose",
813 "Solve with the conjugate transpose"),
814 tuple<SLU::trans_t>(SLU::TRANS,
819 setStringToIntegralParameter<SLU::IterRefine_t>(
"IterRefine",
"NOREFINE",
820 "Type of iterative refinement to use",
821 tuple<string>(
"NOREFINE",
"SLU_SINGLE",
"SLU_DOUBLE"),
822 tuple<string>(
"Do not use iterative refinement",
823 "Do single iterative refinement",
824 "Do double iterative refinement"),
825 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
831 setStringToIntegralParameter<SLU::colperm_t>(
"ColPerm",
"COLAMD",
832 "Specifies how to permute the columns of the "
833 "matrix for sparsity preservation",
834 tuple<string>(
"NATURAL",
"MMD_AT_PLUS_A",
835 "MMD_ATA",
"COLAMD"),
836 tuple<string>(
"Natural ordering",
837 "Minimum degree ordering on A^T + A",
838 "Minimum degree ordering on A^T A",
839 "Approximate minimum degree column ordering"),
840 tuple<SLU::colperm_t>(SLU::NATURAL,
846 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
847 = Teuchos::rcp(
new EnhancedNumberValidator<double>(0.0, 1.0) );
848 pl->set(
"DiagPivotThresh", 1.0,
849 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
850 diag_pivot_thresh_validator);
852 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
853 pl->set(
"ConditionNumber",
false,
"Whether to approximate condition number");
855 pl->set(
"SymmetricMode",
false,
856 "Specifies whether to use the symmetric mode. "
857 "Gives preference to diagonal pivots and uses "
858 "an (A^T + A)-based column permutation.");
862 setStringToIntegralParameter<SLU::rowperm_t>(
"RowPerm",
"LargeDiag",
863 "Type of row permutation strategy to use",
864 tuple<string>(
"NOROWPERM",
"LargeDiag",
"MY_PERMR"),
865 tuple<string>(
"Use natural ordering",
866 "Use weighted bipartite matching algorithm",
867 "Use the ordering given in perm_r input"),
868 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
869 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
891 pl->set(
"ILU_DropTol", 0.0001,
"ILUT drop tolerance");
893 pl->set(
"ILU_FillFactor", 10.0,
"ILUT fill factor");
895 setStringToIntegralParameter<SLU::norm_t>(
"ILU_Norm",
"INF_NORM",
896 "Type of norm to use",
897 tuple<string>(
"ONE_NORM",
"TWO_NORM",
"INF_NORM"),
898 tuple<string>(
"1-norm",
"2-norm",
"inf-norm"),
899 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
902 setStringToIntegralParameter<SLU::milu_t>(
"ILU_MILU",
"SILU",
903 "Type of modified ILU to use",
904 tuple<string>(
"SILU",
"SMILU_1",
"SMILU_2",
"SMILU_3"),
905 tuple<string>(
"Regular ILU",
"MILU 1",
"MILU 2",
"MILU 3"),
906 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
910 pl->set(
"ILU_FillTol", 0.01,
"ILUT fill tolerance");
912 pl->set(
"ILU_Flag",
false,
"ILU flag: if true, run ILU routines");
914 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
917 pl->set(
"UseMetis",
false,
"Whether to call METIS before SuperLU");
918 pl->set(
"SymmetrizeMetis",
true,
"Whether to symmetrize matrix before METIS");
920 pl->set(
"Enable_KokkosKernels_TriangularSolves",
false,
"Whether to use triangular solves.");
921 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
922 pl->set(
"SpTRSV_Invert_Diag",
true,
"specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
923 pl->set(
"SpTRSV_Invert_OffDiag",
false,
"specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
924 pl->set(
"SpTRSV_U_CSR",
true,
"specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
925 pl->set(
"SpTRSV_Merge_Supernodes",
false,
"specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
926 pl->set(
"SpTRSV_Use_SpMV",
false,
"specify whether to use spmv for supernodal sparse-trianguular solve");
936 template <
class Matrix,
class Vector>
942 #ifdef HAVE_AMESOS2_TIMERS
943 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
947 if( current_phase == SYMBFACT )
return false;
950 if( data_.A.Store != NULL ){
951 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
952 data_.A.Store = NULL;
957 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
958 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
959 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
964 #ifdef HAVE_AMESOS2_TIMERS
965 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
968 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
970 "Row and column maps have different indexbase ");
974 host_size_type_array>::do_get(this->matrixA_.ptr(),
975 host_nzvals_view_, host_rows_view_,
976 host_col_ptr_view_, nnz_ret,
977 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
979 this->rowIndexBase_);
983 SLU::Dtype_t dtype = type_map::dtype;
986 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
988 "Did not get the expected number of non-zero vals");
990 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
991 this->globalNumRows_, this->globalNumCols_,
993 convert_nzvals_, host_nzvals_view_,
994 host_rows_view_.data(),
995 host_col_ptr_view_.data(),
996 SLU::SLU_NC, dtype, SLU::SLU_GE);
1002 template <
class Matrix,
class Vector>
1006 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1007 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1010 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1011 int nsuper = 1 + Lstore->nsuper;
1012 Kokkos::resize(data_.parents, nsuper);
1013 for (
int s = 0; s < nsuper; s++) {
1014 int j = Lstore->sup_to_col[s+1]-1;
1015 if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1016 data_.parents(s) = -1;
1018 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1022 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r);
1023 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c);
1024 if (data_.options.Equil == SLU::YES) {
1025 deep_copy_or_assign_view(device_trsv_R_, data_.R);
1026 deep_copy_or_assign_view(device_trsv_C_, data_.C);
1029 bool condition_flag =
false;
1030 if (data_.options.ConditionNumber == SLU::YES) {
1031 using STM = Teuchos::ScalarTraits<magnitude_type>;
1032 const magnitude_type eps = STM::eps ();
1034 SCformat *Lstore = (SCformat*)(data_.L.Store);
1035 int nsuper = 1 + Lstore->nsuper;
1036 int *nb = Lstore->sup_to_col;
1038 for (
int i = 0; i < nsuper; i++) {
1039 if (nb[i+1] - nb[i] > max_cols) {
1040 max_cols = nb[i+1] - nb[i];
1045 const magnitude_type multiply_fact (10.0);
1046 condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1048 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1049 int n = data_.perm_r.extent(0);
1050 std::cout << this->getComm()->getRank()
1051 <<
" : anorm = " << data_.anorm <<
", rcond = " << data_.rcond <<
", n = " << n
1052 <<
", num super cols = " << nsuper <<
", max super cols = " << max_cols
1053 <<
" -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1054 << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ?
" (okay)" :
" (warn)") << std::endl;
1059 if (sptrsv_use_spmv_ && !condition_flag) {
1060 device_khL_.create_sptrsv_handle(
1061 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
true);
1062 device_khU_.create_sptrsv_handle(
1063 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
false);
1065 device_khL_.create_sptrsv_handle(
1066 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
true);
1067 device_khU_.create_sptrsv_handle(
1068 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
false);
1072 device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1075 device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1076 device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1079 bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1080 bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1083 device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1084 device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1087 device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1088 device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1091 device_khL_.set_sptrsv_etree(data_.parents.data());
1092 device_khU_.set_sptrsv_etree(data_.parents.data());
1095 device_khL_.set_sptrsv_perm(data_.perm_r.data());
1096 device_khU_.set_sptrsv_perm(data_.perm_c.data());
1098 int block_size = -1;
1099 if (block_size >= 0) {
1100 std::cout <<
" Block Size : " << block_size << std::endl;
1101 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1102 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1107 #ifdef HAVE_AMESOS2_TIMERS
1108 Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv symbolic");
1109 Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1111 KokkosSparse::Experimental::sptrsv_symbolic
1112 (&device_khL_, &device_khU_, data_.L, data_.U);
1117 #ifdef HAVE_AMESOS2_TIMERS
1118 Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv numeric");
1119 Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1121 KokkosSparse::Experimental::sptrsv_compute
1122 (&device_khL_, &device_khU_, data_.L, data_.U);
1124 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1127 template <
class Matrix,
class Vector>
1129 Superlu<Matrix,Vector>::triangular_solve()
const
1131 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1132 size_t ld_rhs = device_xValues_.extent(0);
1133 size_t nrhs = device_xValues_.extent(1);
1135 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1136 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1139 auto local_device_bValues = device_bValues_;
1140 auto local_device_trsv_perm_r = device_trsv_perm_r_;
1141 auto local_device_trsv_rhs = device_trsv_rhs_;
1144 auto local_device_trsv_R = device_trsv_R_;
1145 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1146 KOKKOS_LAMBDA(
size_t j) {
1147 for(
size_t k = 0; k < nrhs; ++k) {
1148 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1152 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1153 KOKKOS_LAMBDA(
size_t j) {
1154 for(
size_t k = 0; k < nrhs; ++k) {
1155 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1160 for(
size_t k = 0; k < nrhs; ++k) {
1161 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1162 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1166 #ifdef HAVE_AMESOS2_TIMERS
1167 Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for L solve");
1168 Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1170 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1174 #ifdef HAVE_AMESOS2_TIMERS
1175 Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for U solve");
1176 Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1178 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1183 auto local_device_xValues = device_xValues_;
1184 auto local_device_trsv_perm_c = device_trsv_perm_c_;
1186 auto local_device_trsv_C = device_trsv_C_;
1187 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1188 KOKKOS_LAMBDA(
size_t j) {
1189 for(
size_t k = 0; k < nrhs; ++k) {
1190 local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1194 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1195 KOKKOS_LAMBDA(
size_t j) {
1196 for(
size_t k = 0; k < nrhs; ++k) {
1197 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1201 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1204 template<
class Matrix,
class Vector>
1205 const char* Superlu<Matrix,Vector>::name =
"SuperLU";
1210 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:938
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:283
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:94
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:675
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:794
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:117
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:473
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superlu_def.hpp:686
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:171
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:196
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:42