53 #ifndef AMESOS2_SUPERLU_DEF_HPP
54 #define AMESOS2_SUPERLU_DEF_HPP
56 #include <Teuchos_Tuple.hpp>
57 #include <Teuchos_ParameterList.hpp>
58 #include <Teuchos_StandardParameterEntryValidators.hpp>
63 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
70 #include "KokkosSparse_sptrsv_superlu.hpp"
76 template <
class Matrix,
class Vector>
78 Teuchos::RCP<const Matrix> A,
79 Teuchos::RCP<Vector> X,
80 Teuchos::RCP<const Vector> B )
82 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
83 , sptrsv_invert_diag_(true)
84 , sptrsv_invert_offdiag_ (false)
85 , sptrsv_u_in_csr_ (true)
86 , sptrsv_merge_supernodes_ (false)
87 , sptrsv_use_spmv_ (false)
89 , is_contiguous_(true)
90 , use_triangular_solves_(false)
92 , symmetrize_metis_(true)
99 SLU::set_default_options(&(data_.options));
101 data_.options.PrintStat = SLU::NO;
103 SLU::StatInit(&(data_.stat));
105 Kokkos::resize(data_.perm_r, this->globalNumRows_);
106 Kokkos::resize(data_.perm_c, this->globalNumCols_);
107 Kokkos::resize(data_.etree, this->globalNumCols_);
108 Kokkos::resize(data_.R, this->globalNumRows_);
109 Kokkos::resize(data_.C, this->globalNumCols_);
111 data_.relax = SLU::sp_ienv(2);
112 data_.panel_size = SLU::sp_ienv(1);
115 data_.rowequ =
false;
116 data_.colequ =
false;
117 data_.A.Store = NULL;
118 data_.L.Store = NULL;
119 data_.U.Store = NULL;
120 data_.X.Store = NULL;
121 data_.B.Store = NULL;
127 template <
class Matrix,
class Vector>
135 SLU::StatFree( &(data_.stat) ) ;
138 if ( data_.A.Store != NULL ){
139 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
143 if ( data_.L.Store != NULL ){
144 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
145 SLU::Destroy_CompCol_Matrix( &(data_.U) );
149 template <
class Matrix,
class Vector>
153 std::ostringstream oss;
154 oss <<
"SuperLU solver interface";
156 oss <<
", \"ILUTP\" : {";
157 oss <<
"drop tol = " << data_.options.ILU_DropTol;
158 oss <<
", fill factor = " << data_.options.ILU_FillFactor;
159 oss <<
", fill tol = " << data_.options.ILU_FillTol;
160 switch(data_.options.ILU_MILU) {
172 oss <<
", regular ILU";
174 switch(data_.options.ILU_Norm) {
183 oss <<
", infinity-norm";
187 oss <<
", direct solve";
203 template<
class Matrix,
class Vector>
215 int permc_spec = data_.options.ColPerm;
216 if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
217 #ifdef HAVE_AMESOS2_TIMERS
218 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
221 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
228 template <
class Matrix,
class Vector>
243 #ifdef HAVE_AMESOS2_TIMERS
244 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
246 same_symbolic_ =
false;
247 data_.options.Fact = SLU::DOFACT;
250 #ifdef HAVE_AMESOS2_TIMERS
251 Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for METIS");
252 Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
254 size_t n = this->globalNumRows_;
255 size_t nz = this->globalNumNonZeros_;
256 host_size_type_array host_rowind_view_ = host_rows_view_;
257 host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
260 if (symmetrize_metis_) {
267 SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
268 &new_nz, &new_col_ptr, &new_row_ind);
273 Kokkos::resize (host_rowind_view_, nz);
274 Kokkos::realloc(host_colptr_view_, n+1);
275 for (
size_t i = 0; i <= n; i++) {
276 host_colptr_view_(i) = new_col_ptr[i];
278 for (
size_t i = 0; i < nz; i++) {
279 host_rowind_view_(i) = new_row_ind[i];
283 SLU::SUPERLU_FREE(new_col_ptr);
285 SLU::SUPERLU_FREE(new_row_ind);
290 using metis_int_array_type = host_ordinal_type_array;
291 metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_perm"), n);
292 metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_iperm"), n);
296 Amesos2::Util::reorder(
297 host_colptr_view_, host_rowind_view_,
298 metis_perm, metis_iperm, new_nnz,
false);
300 for (
size_t i = 0; i < n; i++) {
301 data_.perm_r(i) = metis_iperm(i);
302 data_.perm_c(i) = metis_iperm(i);
306 data_.options.ColPerm = SLU::MY_PERMC;
307 data_.options.RowPerm = SLU::MY_PERMR;
315 template <
class Matrix,
class Vector>
324 if ( !same_symbolic_ && data_.L.Store != NULL ){
325 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
326 SLU::Destroy_CompCol_Matrix( &(data_.U) );
327 data_.L.Store = NULL;
328 data_.U.Store = NULL;
331 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
336 #ifdef HAVE_AMESOS2_DEBUG
337 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
339 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
340 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
342 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
345 if( data_.options.Equil == SLU::YES ){
346 magnitude_type rowcnd, colcnd, amax;
350 function_map::gsequ(&(data_.A), data_.R.data(),
351 data_.C.data(), &rowcnd, &colcnd,
353 TEUCHOS_TEST_FOR_EXCEPTION
354 (info2 < 0, std::runtime_error,
355 "SuperLU's gsequ function returned with status " << info2 <<
" < 0."
356 " This means that argument " << (-info2) <<
" given to the function"
357 " had an illegal value.");
359 if (info2 <= data_.A.nrow) {
360 TEUCHOS_TEST_FOR_EXCEPTION
361 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
362 "info = " << info2 <<
" > 0, and info <= A.nrow = " << data_.A.nrow
363 <<
". This means that row " << info2 <<
" of A is exactly zero.");
365 else if (info2 > data_.A.ncol) {
366 TEUCHOS_TEST_FOR_EXCEPTION
367 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
368 "info = " << info2 <<
" > 0, and info > A.ncol = " << data_.A.ncol
369 <<
". This means that column " << (info2 - data_.A.nrow) <<
" of "
370 "A is exactly zero.");
373 TEUCHOS_TEST_FOR_EXCEPTION
374 (
true, std::runtime_error,
"SuperLU's gsequ function returned "
375 "with info = " << info2 <<
" > 0, but its value is not in the "
376 "range permitted by the documentation. This should never happen "
377 "(it appears to be SuperLU's fault).");
382 function_map::laqgs(&(data_.A), data_.R.data(),
383 data_.C.data(), rowcnd, colcnd,
384 amax, &(data_.equed));
387 data_.rowequ = (data_.equed ==
'R') || (data_.equed ==
'B');
388 data_.colequ = (data_.equed ==
'C') || (data_.equed ==
'B');
393 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
394 data_.etree.data(), &(data_.AC));
397 #ifdef HAVE_AMESOS2_TIMERS
398 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
401 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
402 std::cout <<
"Superlu:: Before numeric factorization" << std::endl;
403 std::cout <<
"nzvals_ : " << nzvals_.toString() << std::endl;
404 std::cout <<
"rowind_ : " << rowind_.toString() << std::endl;
405 std::cout <<
"colptr_ : " << colptr_.toString() << std::endl;
408 if(ILU_Flag_==
false) {
409 function_map::gstrf(&(data_.options), &(data_.AC),
410 data_.relax, data_.panel_size, data_.etree.data(),
411 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
412 &(data_.L), &(data_.U),
413 #ifdef HAVE_AMESOS2_SUPERLU5_API
416 &(data_.stat), &info);
419 function_map::gsitrf(&(data_.options), &(data_.AC),
420 data_.relax, data_.panel_size, data_.etree.data(),
421 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
422 &(data_.L), &(data_.U),
423 #ifdef HAVE_AMESOS2_SUPERLU5_API
426 &(data_.stat), &info);
429 if (data_.options.ConditionNumber == SLU::YES) {
431 if (data_.options.Trans == SLU::NOTRANS) {
432 *(
unsigned char *)norm =
'1';
434 *(
unsigned char *)norm =
'I';
437 data_.anorm = function_map::langs(norm, &(data_.A));
438 function_map::gscon(norm, &(data_.L), &(data_.U),
439 data_.anorm, &(data_.rcond),
440 &(data_.stat), &info);
444 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
447 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
448 ((SLU::NCformat*)data_.U.Store)->nnz) );
452 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
454 global_size_type info_st = as<global_size_type>(info);
455 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
457 "Factorization complete, but matrix is singular. Division by zero eminent");
458 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
460 "Memory allocation failure in Superlu factorization");
462 data_.options.Fact = SLU::FACTORED;
463 same_symbolic_ =
true;
465 if(use_triangular_solves_) {
466 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
467 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
468 if (this->getComm()->getRank()) {
469 std::cout <<
" > Metis : " << (use_metis_ ?
"YES" :
"NO") << std::endl;
470 std::cout <<
" > Equil : " << (data_.options.Equil == SLU::YES ?
"YES" :
"NO") << std::endl;
471 std::cout <<
" > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ?
"YES" :
"NO") << std::endl;
472 std::cout <<
" > Invert diag : " << sptrsv_invert_diag_ << std::endl;
473 std::cout <<
" > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
474 std::cout <<
" > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
475 std::cout <<
" > Merge : " << sptrsv_merge_supernodes_ << std::endl;
476 std::cout <<
" > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
484 #ifdef HAVE_AMESOS2_TIMERS
485 Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv setup");
486 Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
488 triangular_solve_factor();
495 template <
class Matrix,
class Vector>
501 #ifdef HAVE_AMESOS2_TIMERS
502 Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for Amesos2");
503 Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
506 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
507 const size_t nrhs = X->getGlobalNumVectors();
509 bool bDidAssignX =
false;
510 bool bDidAssignB =
false;
512 #ifdef HAVE_AMESOS2_TIMERS
513 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
514 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
519 const bool initialize_data =
true;
520 const bool do_not_initialize_data =
false;
521 if(use_triangular_solves_) {
522 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
523 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
524 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
526 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
527 this->rowIndexBase_);
528 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
529 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
531 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
532 this->rowIndexBase_);
536 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
537 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
539 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
540 this->rowIndexBase_);
541 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
542 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
544 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
545 this->rowIndexBase_);
553 if(bDidAssignB && data_.equed !=
'N') {
554 if(use_triangular_solves_) {
555 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
556 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
557 device_bValues_.extent(0), device_bValues_.extent(1));
558 Kokkos::deep_copy(copyB, device_bValues_);
559 device_bValues_ = copyB;
563 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
564 host_bValues_.extent(0), host_bValues_.extent(1));
565 Kokkos::deep_copy(copyB, host_bValues_);
566 host_bValues_ = copyB;
572 magnitude_type rpg, rcond;
577 #ifdef HAVE_AMESOS2_TIMERS
578 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
581 if(use_triangular_solves_) {
585 Kokkos::resize(data_.ferr, nrhs);
586 Kokkos::resize(data_.berr, nrhs);
589 #ifdef HAVE_AMESOS2_TIMERS
590 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
592 SLU::Dtype_t dtype = type_map::dtype;
593 int i_ld_rhs = as<int>(ld_rhs);
594 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
595 convert_bValues_, host_bValues_, i_ld_rhs,
596 SLU::SLU_DN, dtype, SLU::SLU_GE);
597 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
598 convert_xValues_, host_xValues_, i_ld_rhs,
599 SLU::SLU_DN, dtype, SLU::SLU_GE);
602 if(ILU_Flag_==
false) {
603 function_map::gssvx(&(data_.options), &(data_.A),
604 data_.perm_c.data(), data_.perm_r.data(),
605 data_.etree.data(), &(data_.equed), data_.R.data(),
606 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
607 &(data_.X), &rpg, &rcond, data_.ferr.data(),
609 #ifdef HAVE_AMESOS2_SUPERLU5_API
612 &(data_.mem_usage), &(data_.stat), &ierr);
615 function_map::gsisx(&(data_.options), &(data_.A),
616 data_.perm_c.data(), data_.perm_r.data(),
617 data_.etree.data(), &(data_.equed), data_.R.data(),
618 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
619 &(data_.X), &rpg, &rcond,
620 #ifdef HAVE_AMESOS2_SUPERLU5_API
623 &(data_.mem_usage), &(data_.stat), &ierr);
627 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
628 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
629 data_.X.Store = NULL;
630 data_.B.Store = NULL;
637 #ifdef HAVE_AMESOS2_TIMERS
638 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
640 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
641 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
645 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
646 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
647 data_.X.Store = NULL;
648 data_.B.Store = NULL;
652 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
654 global_size_type ierr_st = as<global_size_type>(ierr);
655 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
656 std::invalid_argument,
657 "Argument " << -ierr <<
" to SuperLU xgssvx had illegal value" );
658 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
660 "Factorization complete, but U is exactly singular" );
661 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
663 "SuperLU allocated " << ierr - this->globalNumCols_ <<
" bytes of "
664 "memory before allocation failure occured." );
671 #ifdef HAVE_AMESOS2_TIMERS
672 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
675 if(use_triangular_solves_) {
676 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
677 Util::put_1d_data_helper_kokkos_view<
680 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
681 this->rowIndexBase_);
685 Util::put_1d_data_helper_kokkos_view<
688 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
689 this->rowIndexBase_);
697 template <
class Matrix,
class Vector>
704 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
708 template <
class Matrix,
class Vector>
713 using Teuchos::getIntegralValue;
714 using Teuchos::ParameterEntryValidator;
716 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
718 ILU_Flag_ = parameterList->get<
bool>(
"ILU_Flag",
false);
720 SLU::ilu_set_default_options(&(data_.options));
722 data_.options.PrintStat = SLU::NO;
725 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
727 if( parameterList->isParameter(
"Trans") ){
728 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"Trans").validator();
729 parameterList->getEntry(
"Trans").setValidator(trans_validator);
731 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList,
"Trans");
734 if( parameterList->isParameter(
"IterRefine") ){
735 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry(
"IterRefine").validator();
736 parameterList->getEntry(
"IterRefine").setValidator(refine_validator);
738 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList,
"IterRefine");
741 if( parameterList->isParameter(
"ColPerm") ){
742 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
743 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
745 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList,
"ColPerm");
748 data_.options.DiagPivotThresh = parameterList->get<
double>(
"DiagPivotThresh", 1.0);
750 bool equil = parameterList->get<
bool>(
"Equil",
true);
751 data_.options.Equil = equil ? SLU::YES : SLU::NO;
753 bool condNum = parameterList->get<
bool>(
"ConditionNumber",
false);
754 data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
756 bool symmetric_mode = parameterList->get<
bool>(
"SymmetricMode",
false);
757 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
760 if( parameterList->isParameter(
"RowPerm") ){
761 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry(
"RowPerm").validator();
762 parameterList->getEntry(
"RowPerm").setValidator(rowperm_validator);
763 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList,
"RowPerm");
772 data_.options.ILU_DropTol = parameterList->get<
double>(
"ILU_DropTol", 0.0001);
774 data_.options.ILU_FillFactor = parameterList->get<
double>(
"ILU_FillFactor", 10.0);
776 if( parameterList->isParameter(
"ILU_Norm") ) {
777 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry(
"ILU_Norm").validator();
778 parameterList->getEntry(
"ILU_Norm").setValidator(norm_validator);
779 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList,
"ILU_Norm");
782 if( parameterList->isParameter(
"ILU_MILU") ) {
783 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry(
"ILU_MILU").validator();
784 parameterList->getEntry(
"ILU_MILU").setValidator(milu_validator);
785 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList,
"ILU_MILU");
788 data_.options.ILU_FillTol = parameterList->get<
double>(
"ILU_FillTol", 0.01);
790 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous",
true);
792 use_metis_ = parameterList->get<
bool>(
"UseMetis",
false);
793 symmetrize_metis_ = parameterList->get<
bool>(
"SymmetrizeMetis",
true);
795 use_triangular_solves_ = parameterList->get<
bool>(
"Enable_KokkosKernels_TriangularSolves",
false);
796 if(use_triangular_solves_) {
797 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
799 sptrsv_invert_diag_ = parameterList->get<
bool>(
"SpTRSV_Invert_Diag",
true);
801 sptrsv_invert_offdiag_ = parameterList->get<
bool>(
"SpTRSV_Invert_OffDiag",
false);
803 sptrsv_u_in_csr_ = parameterList->get<
bool>(
"SpTRSV_U_CSR",
true);
805 sptrsv_merge_supernodes_ = parameterList->get<
bool>(
"SpTRSV_Merge_Supernodes",
false);
807 sptrsv_use_spmv_ = parameterList->get<
bool>(
"SpTRSV_Use_SpMV",
false);
809 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
810 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
816 template <
class Matrix,
class Vector>
817 Teuchos::RCP<const Teuchos::ParameterList>
821 using Teuchos::tuple;
822 using Teuchos::ParameterList;
823 using Teuchos::EnhancedNumberValidator;
824 using Teuchos::setStringToIntegralParameter;
825 using Teuchos::stringToIntegralParameterEntryValidator;
827 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
829 if( is_null(valid_params) ){
830 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
832 setStringToIntegralParameter<SLU::trans_t>(
"Trans",
"NOTRANS",
833 "Solve for the transpose system or not",
834 tuple<string>(
"TRANS",
"NOTRANS",
"CONJ"),
835 tuple<string>(
"Solve with transpose",
836 "Do not solve with transpose",
837 "Solve with the conjugate transpose"),
838 tuple<SLU::trans_t>(SLU::TRANS,
843 setStringToIntegralParameter<SLU::IterRefine_t>(
"IterRefine",
"NOREFINE",
844 "Type of iterative refinement to use",
845 tuple<string>(
"NOREFINE",
"SLU_SINGLE",
"SLU_DOUBLE"),
846 tuple<string>(
"Do not use iterative refinement",
847 "Do single iterative refinement",
848 "Do double iterative refinement"),
849 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
855 setStringToIntegralParameter<SLU::colperm_t>(
"ColPerm",
"COLAMD",
856 "Specifies how to permute the columns of the "
857 "matrix for sparsity preservation",
858 tuple<string>(
"NATURAL",
"MMD_AT_PLUS_A",
859 "MMD_ATA",
"COLAMD"),
860 tuple<string>(
"Natural ordering",
861 "Minimum degree ordering on A^T + A",
862 "Minimum degree ordering on A^T A",
863 "Approximate minimum degree column ordering"),
864 tuple<SLU::colperm_t>(SLU::NATURAL,
870 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
871 = Teuchos::rcp(
new EnhancedNumberValidator<double>(0.0, 1.0) );
872 pl->set(
"DiagPivotThresh", 1.0,
873 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
874 diag_pivot_thresh_validator);
876 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
877 pl->set(
"ConditionNumber",
false,
"Whether to approximate condition number");
879 pl->set(
"SymmetricMode",
false,
880 "Specifies whether to use the symmetric mode. "
881 "Gives preference to diagonal pivots and uses "
882 "an (A^T + A)-based column permutation.");
886 setStringToIntegralParameter<SLU::rowperm_t>(
"RowPerm",
"LargeDiag",
887 "Type of row permutation strategy to use",
888 tuple<string>(
"NOROWPERM",
"LargeDiag",
"MY_PERMR"),
889 tuple<string>(
"Use natural ordering",
890 "Use weighted bipartite matching algorithm",
891 "Use the ordering given in perm_r input"),
892 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
893 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
915 pl->set(
"ILU_DropTol", 0.0001,
"ILUT drop tolerance");
917 pl->set(
"ILU_FillFactor", 10.0,
"ILUT fill factor");
919 setStringToIntegralParameter<SLU::norm_t>(
"ILU_Norm",
"INF_NORM",
920 "Type of norm to use",
921 tuple<string>(
"ONE_NORM",
"TWO_NORM",
"INF_NORM"),
922 tuple<string>(
"1-norm",
"2-norm",
"inf-norm"),
923 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
926 setStringToIntegralParameter<SLU::milu_t>(
"ILU_MILU",
"SILU",
927 "Type of modified ILU to use",
928 tuple<string>(
"SILU",
"SMILU_1",
"SMILU_2",
"SMILU_3"),
929 tuple<string>(
"Regular ILU",
"MILU 1",
"MILU 2",
"MILU 3"),
930 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
934 pl->set(
"ILU_FillTol", 0.01,
"ILUT fill tolerance");
936 pl->set(
"ILU_Flag",
false,
"ILU flag: if true, run ILU routines");
938 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
941 pl->set(
"UseMetis",
false,
"Whether to call METIS before SuperLU");
942 pl->set(
"SymmetrizeMetis",
true,
"Whether to symmetrize matrix before METIS");
944 pl->set(
"Enable_KokkosKernels_TriangularSolves",
false,
"Whether to use triangular solves.");
945 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
946 pl->set(
"SpTRSV_Invert_Diag",
true,
"specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
947 pl->set(
"SpTRSV_Invert_OffDiag",
false,
"specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
948 pl->set(
"SpTRSV_U_CSR",
true,
"specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
949 pl->set(
"SpTRSV_Merge_Supernodes",
false,
"specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
950 pl->set(
"SpTRSV_Use_SpMV",
false,
"specify whether to use spmv for supernodal sparse-trianguular solve");
960 template <
class Matrix,
class Vector>
966 #ifdef HAVE_AMESOS2_TIMERS
967 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
971 if( current_phase == SYMBFACT )
return false;
974 if( data_.A.Store != NULL ){
975 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
976 data_.A.Store = NULL;
981 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
982 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
983 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
988 #ifdef HAVE_AMESOS2_TIMERS
989 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
992 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
994 "Row and column maps have different indexbase ");
998 host_size_type_array>::do_get(this->matrixA_.ptr(),
999 host_nzvals_view_, host_rows_view_,
1000 host_col_ptr_view_, nnz_ret,
1001 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
1003 this->rowIndexBase_);
1007 SLU::Dtype_t dtype = type_map::dtype;
1010 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
1012 "Did not get the expected number of non-zero vals");
1014 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
1015 this->globalNumRows_, this->globalNumCols_,
1017 convert_nzvals_, host_nzvals_view_,
1018 host_rows_view_.data(),
1019 host_col_ptr_view_.data(),
1020 SLU::SLU_NC, dtype, SLU::SLU_GE);
1026 template <
class Matrix,
class Vector>
1030 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1031 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1034 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1035 int nsuper = 1 + Lstore->nsuper;
1036 Kokkos::resize(data_.parents, nsuper);
1037 for (
int s = 0; s < nsuper; s++) {
1038 int j = Lstore->sup_to_col[s+1]-1;
1039 if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1040 data_.parents(s) = -1;
1042 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1046 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r);
1047 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c);
1048 if (data_.options.Equil == SLU::YES) {
1049 deep_copy_or_assign_view(device_trsv_R_, data_.R);
1050 deep_copy_or_assign_view(device_trsv_C_, data_.C);
1053 bool condition_flag =
false;
1054 if (data_.options.ConditionNumber == SLU::YES) {
1055 using STM = Teuchos::ScalarTraits<magnitude_type>;
1056 const magnitude_type eps = STM::eps ();
1058 SCformat *Lstore = (SCformat*)(data_.L.Store);
1059 int nsuper = 1 + Lstore->nsuper;
1060 int *nb = Lstore->sup_to_col;
1062 for (
int i = 0; i < nsuper; i++) {
1063 if (nb[i+1] - nb[i] > max_cols) {
1064 max_cols = nb[i+1] - nb[i];
1069 const magnitude_type multiply_fact (10.0);
1070 condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1072 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1073 int n = data_.perm_r.extent(0);
1074 std::cout << this->getComm()->getRank()
1075 <<
" : anorm = " << data_.anorm <<
", rcond = " << data_.rcond <<
", n = " << n
1076 <<
", num super cols = " << nsuper <<
", max super cols = " << max_cols
1077 <<
" -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1078 << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ?
" (okay)" :
" (warn)") << std::endl;
1083 if (sptrsv_use_spmv_ && !condition_flag) {
1084 device_khL_.create_sptrsv_handle(
1085 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
true);
1086 device_khU_.create_sptrsv_handle(
1087 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
false);
1089 device_khL_.create_sptrsv_handle(
1090 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
true);
1091 device_khU_.create_sptrsv_handle(
1092 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
false);
1096 device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1099 device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1100 device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1103 bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1104 bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1107 device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1108 device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1111 device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1112 device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1115 device_khL_.set_sptrsv_etree(data_.parents.data());
1116 device_khU_.set_sptrsv_etree(data_.parents.data());
1119 device_khL_.set_sptrsv_perm(data_.perm_r.data());
1120 device_khU_.set_sptrsv_perm(data_.perm_c.data());
1122 int block_size = -1;
1123 if (block_size >= 0) {
1124 std::cout <<
" Block Size : " << block_size << std::endl;
1125 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1126 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1131 #ifdef HAVE_AMESOS2_TIMERS
1132 Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv symbolic");
1133 Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1135 KokkosSparse::Experimental::sptrsv_symbolic
1136 (&device_khL_, &device_khU_, data_.L, data_.U);
1141 #ifdef HAVE_AMESOS2_TIMERS
1142 Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv numeric");
1143 Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1145 KokkosSparse::Experimental::sptrsv_compute
1146 (&device_khL_, &device_khU_, data_.L, data_.U);
1148 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1151 template <
class Matrix,
class Vector>
1153 Superlu<Matrix,Vector>::triangular_solve()
const
1155 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1156 size_t ld_rhs = device_xValues_.extent(0);
1157 size_t nrhs = device_xValues_.extent(1);
1159 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1160 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1163 auto local_device_bValues = device_bValues_;
1164 auto local_device_trsv_perm_r = device_trsv_perm_r_;
1165 auto local_device_trsv_rhs = device_trsv_rhs_;
1168 auto local_device_trsv_R = device_trsv_R_;
1169 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1170 KOKKOS_LAMBDA(
size_t j) {
1171 for(
size_t k = 0; k < nrhs; ++k) {
1172 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1176 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1177 KOKKOS_LAMBDA(
size_t j) {
1178 for(
size_t k = 0; k < nrhs; ++k) {
1179 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1184 for(
size_t k = 0; k < nrhs; ++k) {
1185 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1186 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1190 #ifdef HAVE_AMESOS2_TIMERS
1191 Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for L solve");
1192 Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1194 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1198 #ifdef HAVE_AMESOS2_TIMERS
1199 Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for U solve");
1200 Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1202 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1207 auto local_device_xValues = device_xValues_;
1208 auto local_device_trsv_perm_c = device_trsv_perm_c_;
1210 auto local_device_trsv_C = device_trsv_C_;
1211 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1212 KOKKOS_LAMBDA(
size_t j) {
1213 for(
size_t k = 0; k < nrhs; ++k) {
1214 local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1218 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1219 KOKKOS_LAMBDA(
size_t j) {
1220 for(
size_t k = 0; k < nrhs; ++k) {
1221 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1225 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1228 template<
class Matrix,
class Vector>
1229 const char* Superlu<Matrix,Vector>::name =
"SuperLU";
1234 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:648
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:962
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:317
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:128
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:699
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:818
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:151
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:497
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superlu_def.hpp:710
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:205
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:230
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:76