19 #ifndef AMESOS2_SUPERLU_DEF_HPP
20 #define AMESOS2_SUPERLU_DEF_HPP
22 #include <Teuchos_Tuple.hpp>
23 #include <Teuchos_ParameterList.hpp>
24 #include <Teuchos_StandardParameterEntryValidators.hpp>
29 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
36 #include "KokkosSparse_sptrsv_superlu.hpp"
42 template <
class Matrix,
class Vector>
44 Teuchos::RCP<const Matrix> A,
45 Teuchos::RCP<Vector> X,
46 Teuchos::RCP<const Vector> B )
48 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
49 , sptrsv_invert_diag_(true)
50 , sptrsv_invert_offdiag_ (false)
51 , sptrsv_u_in_csr_ (true)
52 , sptrsv_merge_supernodes_ (false)
53 , sptrsv_use_spmv_ (false)
55 , is_contiguous_(true)
56 , use_triangular_solves_(false)
58 , symmetrize_metis_(true)
65 SLU::set_default_options(&(data_.options));
67 data_.options.PrintStat = SLU::NO;
69 SLU::StatInit(&(data_.stat));
71 Kokkos::resize(data_.perm_r, this->globalNumRows_);
72 Kokkos::resize(data_.perm_c, this->globalNumCols_);
73 Kokkos::resize(data_.etree, this->globalNumCols_);
74 Kokkos::resize(data_.R, this->globalNumRows_);
75 Kokkos::resize(data_.C, this->globalNumCols_);
77 data_.relax = SLU::sp_ienv(2);
78 data_.panel_size = SLU::sp_ienv(1);
93 template <
class Matrix,
class Vector>
101 SLU::StatFree( &(data_.stat) ) ;
104 if ( data_.A.Store != NULL ){
105 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
109 if ( data_.L.Store != NULL ){
110 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
111 SLU::Destroy_CompCol_Matrix( &(data_.U) );
115 template <
class Matrix,
class Vector>
119 std::ostringstream oss;
120 oss <<
"SuperLU solver interface";
122 oss <<
", \"ILUTP\" : {";
123 oss <<
"drop tol = " << data_.options.ILU_DropTol;
124 oss <<
", fill factor = " << data_.options.ILU_FillFactor;
125 oss <<
", fill tol = " << data_.options.ILU_FillTol;
126 switch(data_.options.ILU_MILU) {
138 oss <<
", regular ILU";
140 switch(data_.options.ILU_Norm) {
149 oss <<
", infinity-norm";
153 oss <<
", direct solve";
169 template<
class Matrix,
class Vector>
181 int permc_spec = data_.options.ColPerm;
182 if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
183 #ifdef HAVE_AMESOS2_TIMERS
184 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
187 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
194 template <
class Matrix,
class Vector>
209 #ifdef HAVE_AMESOS2_TIMERS
210 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
212 same_symbolic_ =
false;
213 data_.options.Fact = SLU::DOFACT;
216 #ifdef HAVE_AMESOS2_TIMERS
217 Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for METIS");
218 Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
220 size_t n = this->globalNumRows_;
221 size_t nz = this->globalNumNonZeros_;
222 host_size_type_array host_rowind_view_ = host_rows_view_;
223 host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
226 if (symmetrize_metis_) {
233 SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
234 &new_nz, &new_col_ptr, &new_row_ind);
239 Kokkos::resize (host_rowind_view_, nz);
240 Kokkos::realloc(host_colptr_view_, n+1);
241 for (
size_t i = 0; i <= n; i++) {
242 host_colptr_view_(i) = new_col_ptr[i];
244 for (
size_t i = 0; i < nz; i++) {
245 host_rowind_view_(i) = new_row_ind[i];
249 SLU::SUPERLU_FREE(new_col_ptr);
251 SLU::SUPERLU_FREE(new_row_ind);
256 using metis_int_array_type = host_ordinal_type_array;
257 metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_perm"), n);
258 metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_iperm"), n);
262 Amesos2::Util::reorder(
263 host_colptr_view_, host_rowind_view_,
264 metis_perm, metis_iperm, new_nnz,
false);
266 for (
size_t i = 0; i < n; i++) {
267 data_.perm_r(i) = metis_iperm(i);
268 data_.perm_c(i) = metis_iperm(i);
272 data_.options.ColPerm = SLU::MY_PERMC;
273 data_.options.RowPerm = SLU::MY_PERMR;
281 template <
class Matrix,
class Vector>
290 if ( !same_symbolic_ && data_.L.Store != NULL ){
291 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
292 SLU::Destroy_CompCol_Matrix( &(data_.U) );
293 data_.L.Store = NULL;
294 data_.U.Store = NULL;
297 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
303 #ifdef HAVE_AMESOS2_DEBUG
304 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
306 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
307 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
309 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
312 if( data_.options.Equil == SLU::YES ){
313 magnitude_type rowcnd, colcnd, amax;
316 function_map::gsequ(&(data_.A), data_.R.data(),
317 data_.C.data(), &rowcnd, &colcnd,
322 function_map::laqgs(&(data_.A), data_.R.data(),
323 data_.C.data(), rowcnd, colcnd,
324 amax, &(data_.equed));
327 data_.rowequ = (data_.equed ==
'R') || (data_.equed ==
'B');
328 data_.colequ = (data_.equed ==
'C') || (data_.equed ==
'B');
335 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
336 data_.etree.data(), &(data_.AC));
339 #ifdef HAVE_AMESOS2_TIMERS
340 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
343 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
344 std::cout <<
"Superlu:: Before numeric factorization" << std::endl;
345 std::cout <<
"nzvals_ : " << nzvals_.toString() << std::endl;
346 std::cout <<
"rowind_ : " << rowind_.toString() << std::endl;
347 std::cout <<
"colptr_ : " << colptr_.toString() << std::endl;
350 if(ILU_Flag_==
false) {
351 function_map::gstrf(&(data_.options), &(data_.AC),
352 data_.relax, data_.panel_size, data_.etree.data(),
353 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
354 &(data_.L), &(data_.U),
355 #ifdef HAVE_AMESOS2_SUPERLU5_API
358 &(data_.stat), &info);
361 function_map::gsitrf(&(data_.options), &(data_.AC),
362 data_.relax, data_.panel_size, data_.etree.data(),
363 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
364 &(data_.L), &(data_.U),
365 #ifdef HAVE_AMESOS2_SUPERLU5_API
368 &(data_.stat), &info);
371 if (data_.options.ConditionNumber == SLU::YES) {
373 if (data_.options.Trans == SLU::NOTRANS) {
374 *(
unsigned char *)norm =
'1';
376 *(
unsigned char *)norm =
'I';
379 data_.anorm = function_map::langs(norm, &(data_.A));
380 function_map::gscon(norm, &(data_.L), &(data_.U),
381 data_.anorm, &(data_.rcond),
382 &(data_.stat), &info);
386 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
389 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
390 ((SLU::NCformat*)data_.U.Store)->nnz) );
394 if( data_.options.Equil == SLU::YES ) {
396 Teuchos::broadcast(*(this->getComm()), 0, &info2);
398 TEUCHOS_TEST_FOR_EXCEPTION
399 (info2 < 0, std::runtime_error,
400 "SuperLU's gsequ function returned with status " << info2 <<
" < 0."
401 " This means that argument " << (-info2) <<
" given to the function"
402 " had an illegal value.");
404 if (info2 <= data_.A.nrow) {
405 TEUCHOS_TEST_FOR_EXCEPTION
406 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
407 "info = " << info2 <<
" > 0, and info <= A.nrow = " << data_.A.nrow
408 <<
". This means that row " << info2 <<
" of A is exactly zero.");
410 else if (info2 > data_.A.ncol) {
411 TEUCHOS_TEST_FOR_EXCEPTION
412 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
413 "info = " << info2 <<
" > 0, and info > A.ncol = " << data_.A.ncol
414 <<
". This means that column " << (info2 - data_.A.nrow) <<
" of "
415 "A is exactly zero.");
418 TEUCHOS_TEST_FOR_EXCEPTION
419 (
true, std::runtime_error,
"SuperLU's gsequ function returned "
420 "with info = " << info2 <<
" > 0, but its value is not in the "
421 "range permitted by the documentation. This should never happen "
422 "(it appears to be SuperLU's fault).");
428 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
430 global_size_type info_st = as<global_size_type>(info);
431 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
433 "Factorization complete, but matrix is singular. Division by zero eminent");
434 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
436 "Memory allocation failure in Superlu factorization");
438 data_.options.Fact = SLU::FACTORED;
439 same_symbolic_ =
true;
441 if(use_triangular_solves_) {
442 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
443 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
444 if (this->getComm()->getRank()) {
445 std::cout <<
" > Metis : " << (use_metis_ ?
"YES" :
"NO") << std::endl;
446 std::cout <<
" > Equil : " << (data_.options.Equil == SLU::YES ?
"YES" :
"NO") << std::endl;
447 std::cout <<
" > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ?
"YES" :
"NO") << std::endl;
448 std::cout <<
" > Invert diag : " << sptrsv_invert_diag_ << std::endl;
449 std::cout <<
" > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
450 std::cout <<
" > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
451 std::cout <<
" > Merge : " << sptrsv_merge_supernodes_ << std::endl;
452 std::cout <<
" > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
460 #ifdef HAVE_AMESOS2_TIMERS
461 Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv setup");
462 Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
464 triangular_solve_factor();
471 template <
class Matrix,
class Vector>
477 #ifdef HAVE_AMESOS2_TIMERS
478 Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for Amesos2");
479 Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
482 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
483 const size_t nrhs = X->getGlobalNumVectors();
485 bool bDidAssignX =
false;
486 bool bDidAssignB =
false;
488 #ifdef HAVE_AMESOS2_TIMERS
489 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
494 const bool initialize_data =
true;
495 const bool do_not_initialize_data =
false;
496 if(use_triangular_solves_) {
497 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
498 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
499 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
501 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
502 this->rowIndexBase_);
503 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
504 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
506 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
507 this->rowIndexBase_);
511 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
512 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
514 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
515 this->rowIndexBase_);
516 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
517 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
519 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
520 this->rowIndexBase_);
528 if(bDidAssignB && data_.equed !=
'N') {
529 if(use_triangular_solves_) {
530 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
531 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
532 device_bValues_.extent(0), device_bValues_.extent(1));
533 Kokkos::deep_copy(copyB, device_bValues_);
534 device_bValues_ = copyB;
538 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
539 host_bValues_.extent(0), host_bValues_.extent(1));
540 Kokkos::deep_copy(copyB, host_bValues_);
541 host_bValues_ = copyB;
547 magnitude_type rpg, rcond;
552 #ifdef HAVE_AMESOS2_TIMERS
553 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
556 if(use_triangular_solves_) {
560 Kokkos::resize(data_.ferr, nrhs);
561 Kokkos::resize(data_.berr, nrhs);
564 #ifdef HAVE_AMESOS2_TIMERS
565 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
567 SLU::Dtype_t dtype = type_map::dtype;
568 int i_ld_rhs = as<int>(ld_rhs);
569 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
570 convert_bValues_, host_bValues_, i_ld_rhs,
571 SLU::SLU_DN, dtype, SLU::SLU_GE);
572 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
573 convert_xValues_, host_xValues_, i_ld_rhs,
574 SLU::SLU_DN, dtype, SLU::SLU_GE);
577 if(ILU_Flag_==
false) {
578 function_map::gssvx(&(data_.options), &(data_.A),
579 data_.perm_c.data(), data_.perm_r.data(),
580 data_.etree.data(), &(data_.equed), data_.R.data(),
581 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
582 &(data_.X), &rpg, &rcond, data_.ferr.data(),
584 #ifdef HAVE_AMESOS2_SUPERLU5_API
587 &(data_.mem_usage), &(data_.stat), &ierr);
590 function_map::gsisx(&(data_.options), &(data_.A),
591 data_.perm_c.data(), data_.perm_r.data(),
592 data_.etree.data(), &(data_.equed), data_.R.data(),
593 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
594 &(data_.X), &rpg, &rcond,
595 #ifdef HAVE_AMESOS2_SUPERLU5_API
598 &(data_.mem_usage), &(data_.stat), &ierr);
602 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
603 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
604 data_.X.Store = NULL;
605 data_.B.Store = NULL;
612 #ifdef HAVE_AMESOS2_TIMERS
613 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
615 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
616 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
620 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
621 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
622 data_.X.Store = NULL;
623 data_.B.Store = NULL;
627 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
629 global_size_type ierr_st = as<global_size_type>(ierr);
630 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
631 std::invalid_argument,
632 "Argument " << -ierr <<
" to SuperLU xgssvx had illegal value" );
633 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
635 "Factorization complete, but U is exactly singular" );
636 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
638 "SuperLU allocated " << ierr - this->globalNumCols_ <<
" bytes of "
639 "memory before allocation failure occured." );
646 #ifdef HAVE_AMESOS2_TIMERS
647 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
650 if(use_triangular_solves_) {
651 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
652 Util::put_1d_data_helper_kokkos_view<
655 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
656 this->rowIndexBase_);
660 Util::put_1d_data_helper_kokkos_view<
663 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
664 this->rowIndexBase_);
672 template <
class Matrix,
class Vector>
679 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
683 template <
class Matrix,
class Vector>
688 using Teuchos::getIntegralValue;
689 using Teuchos::ParameterEntryValidator;
691 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
693 ILU_Flag_ = parameterList->get<
bool>(
"ILU_Flag",
false);
695 SLU::ilu_set_default_options(&(data_.options));
697 data_.options.PrintStat = SLU::NO;
700 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
702 if( parameterList->isParameter(
"Trans") ){
703 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"Trans").validator();
704 parameterList->getEntry(
"Trans").setValidator(trans_validator);
706 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList,
"Trans");
709 if( parameterList->isParameter(
"IterRefine") ){
710 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry(
"IterRefine").validator();
711 parameterList->getEntry(
"IterRefine").setValidator(refine_validator);
713 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList,
"IterRefine");
716 if( parameterList->isParameter(
"ColPerm") ){
717 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
718 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
720 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList,
"ColPerm");
723 data_.options.DiagPivotThresh = parameterList->get<
double>(
"DiagPivotThresh", 1.0);
725 bool equil = parameterList->get<
bool>(
"Equil",
true);
726 data_.options.Equil = equil ? SLU::YES : SLU::NO;
728 bool condNum = parameterList->get<
bool>(
"ConditionNumber",
false);
729 data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
731 bool symmetric_mode = parameterList->get<
bool>(
"SymmetricMode",
false);
732 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
735 if( parameterList->isParameter(
"RowPerm") ){
736 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry(
"RowPerm").validator();
737 parameterList->getEntry(
"RowPerm").setValidator(rowperm_validator);
738 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList,
"RowPerm");
747 data_.options.ILU_DropTol = parameterList->get<
double>(
"ILU_DropTol", 0.0001);
749 data_.options.ILU_FillFactor = parameterList->get<
double>(
"ILU_FillFactor", 10.0);
751 if( parameterList->isParameter(
"ILU_Norm") ) {
752 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry(
"ILU_Norm").validator();
753 parameterList->getEntry(
"ILU_Norm").setValidator(norm_validator);
754 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList,
"ILU_Norm");
757 if( parameterList->isParameter(
"ILU_MILU") ) {
758 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry(
"ILU_MILU").validator();
759 parameterList->getEntry(
"ILU_MILU").setValidator(milu_validator);
760 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList,
"ILU_MILU");
763 data_.options.ILU_FillTol = parameterList->get<
double>(
"ILU_FillTol", 0.01);
765 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous",
true);
767 use_metis_ = parameterList->get<
bool>(
"UseMetis",
false);
768 symmetrize_metis_ = parameterList->get<
bool>(
"SymmetrizeMetis",
true);
770 use_triangular_solves_ = parameterList->get<
bool>(
"Enable_KokkosKernels_TriangularSolves",
false);
771 if(use_triangular_solves_) {
772 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
774 sptrsv_invert_diag_ = parameterList->get<
bool>(
"SpTRSV_Invert_Diag",
true);
776 sptrsv_invert_offdiag_ = parameterList->get<
bool>(
"SpTRSV_Invert_OffDiag",
false);
778 sptrsv_u_in_csr_ = parameterList->get<
bool>(
"SpTRSV_U_CSR",
true);
780 sptrsv_merge_supernodes_ = parameterList->get<
bool>(
"SpTRSV_Merge_Supernodes",
false);
782 sptrsv_use_spmv_ = parameterList->get<
bool>(
"SpTRSV_Use_SpMV",
false);
784 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
785 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
791 template <
class Matrix,
class Vector>
792 Teuchos::RCP<const Teuchos::ParameterList>
796 using Teuchos::tuple;
797 using Teuchos::ParameterList;
798 using Teuchos::EnhancedNumberValidator;
799 using Teuchos::setStringToIntegralParameter;
800 using Teuchos::stringToIntegralParameterEntryValidator;
802 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
804 if( is_null(valid_params) ){
805 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
807 setStringToIntegralParameter<SLU::trans_t>(
"Trans",
"NOTRANS",
808 "Solve for the transpose system or not",
809 tuple<string>(
"TRANS",
"NOTRANS",
"CONJ"),
810 tuple<string>(
"Solve with transpose",
811 "Do not solve with transpose",
812 "Solve with the conjugate transpose"),
813 tuple<SLU::trans_t>(SLU::TRANS,
818 setStringToIntegralParameter<SLU::IterRefine_t>(
"IterRefine",
"NOREFINE",
819 "Type of iterative refinement to use",
820 tuple<string>(
"NOREFINE",
"SLU_SINGLE",
"SLU_DOUBLE"),
821 tuple<string>(
"Do not use iterative refinement",
822 "Do single iterative refinement",
823 "Do double iterative refinement"),
824 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
830 setStringToIntegralParameter<SLU::colperm_t>(
"ColPerm",
"COLAMD",
831 "Specifies how to permute the columns of the "
832 "matrix for sparsity preservation",
833 tuple<string>(
"NATURAL",
"MMD_AT_PLUS_A",
834 "MMD_ATA",
"COLAMD"),
835 tuple<string>(
"Natural ordering",
836 "Minimum degree ordering on A^T + A",
837 "Minimum degree ordering on A^T A",
838 "Approximate minimum degree column ordering"),
839 tuple<SLU::colperm_t>(SLU::NATURAL,
845 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
846 = Teuchos::rcp(
new EnhancedNumberValidator<double>(0.0, 1.0) );
847 pl->set(
"DiagPivotThresh", 1.0,
848 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
849 diag_pivot_thresh_validator);
851 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
852 pl->set(
"ConditionNumber",
false,
"Whether to approximate condition number");
854 pl->set(
"SymmetricMode",
false,
855 "Specifies whether to use the symmetric mode. "
856 "Gives preference to diagonal pivots and uses "
857 "an (A^T + A)-based column permutation.");
861 setStringToIntegralParameter<SLU::rowperm_t>(
"RowPerm",
"LargeDiag",
862 "Type of row permutation strategy to use",
863 tuple<string>(
"NOROWPERM",
"LargeDiag",
"MY_PERMR"),
864 tuple<string>(
"Use natural ordering",
865 "Use weighted bipartite matching algorithm",
866 "Use the ordering given in perm_r input"),
867 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
868 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
890 pl->set(
"ILU_DropTol", 0.0001,
"ILUT drop tolerance");
892 pl->set(
"ILU_FillFactor", 10.0,
"ILUT fill factor");
894 setStringToIntegralParameter<SLU::norm_t>(
"ILU_Norm",
"INF_NORM",
895 "Type of norm to use",
896 tuple<string>(
"ONE_NORM",
"TWO_NORM",
"INF_NORM"),
897 tuple<string>(
"1-norm",
"2-norm",
"inf-norm"),
898 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
901 setStringToIntegralParameter<SLU::milu_t>(
"ILU_MILU",
"SILU",
902 "Type of modified ILU to use",
903 tuple<string>(
"SILU",
"SMILU_1",
"SMILU_2",
"SMILU_3"),
904 tuple<string>(
"Regular ILU",
"MILU 1",
"MILU 2",
"MILU 3"),
905 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
909 pl->set(
"ILU_FillTol", 0.01,
"ILUT fill tolerance");
911 pl->set(
"ILU_Flag",
false,
"ILU flag: if true, run ILU routines");
913 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
916 pl->set(
"UseMetis",
false,
"Whether to call METIS before SuperLU");
917 pl->set(
"SymmetrizeMetis",
true,
"Whether to symmetrize matrix before METIS");
919 pl->set(
"Enable_KokkosKernels_TriangularSolves",
false,
"Whether to use triangular solves.");
920 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
921 pl->set(
"SpTRSV_Invert_Diag",
true,
"specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
922 pl->set(
"SpTRSV_Invert_OffDiag",
false,
"specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
923 pl->set(
"SpTRSV_U_CSR",
true,
"specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
924 pl->set(
"SpTRSV_Merge_Supernodes",
false,
"specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
925 pl->set(
"SpTRSV_Use_SpMV",
false,
"specify whether to use spmv for supernodal sparse-trianguular solve");
935 template <
class Matrix,
class Vector>
941 #ifdef HAVE_AMESOS2_TIMERS
942 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
946 if( current_phase == SYMBFACT )
return false;
949 if( data_.A.Store != NULL ){
950 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
951 data_.A.Store = NULL;
956 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
957 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
958 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
963 #ifdef HAVE_AMESOS2_TIMERS
964 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
967 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
969 "Row and column maps have different indexbase ");
973 host_size_type_array>::do_get(this->matrixA_.ptr(),
974 host_nzvals_view_, host_rows_view_,
975 host_col_ptr_view_, nnz_ret,
976 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
978 this->rowIndexBase_);
982 SLU::Dtype_t dtype = type_map::dtype;
985 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
987 "Did not get the expected number of non-zero vals");
989 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
990 this->globalNumRows_, this->globalNumCols_,
992 convert_nzvals_, host_nzvals_view_,
993 host_rows_view_.data(),
994 host_col_ptr_view_.data(),
995 SLU::SLU_NC, dtype, SLU::SLU_GE);
1001 template <
class Matrix,
class Vector>
1005 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1006 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1009 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1010 int nsuper = 1 + Lstore->nsuper;
1011 Kokkos::resize(data_.parents, nsuper);
1012 for (
int s = 0; s < nsuper; s++) {
1013 int j = Lstore->sup_to_col[s+1]-1;
1014 if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1015 data_.parents(s) = -1;
1017 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1021 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r);
1022 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c);
1023 if (data_.options.Equil == SLU::YES) {
1024 deep_copy_or_assign_view(device_trsv_R_, data_.R);
1025 deep_copy_or_assign_view(device_trsv_C_, data_.C);
1028 bool condition_flag =
false;
1029 if (data_.options.ConditionNumber == SLU::YES) {
1030 using STM = Teuchos::ScalarTraits<magnitude_type>;
1031 const magnitude_type eps = STM::eps ();
1033 SCformat *Lstore = (SCformat*)(data_.L.Store);
1034 int nsuper = 1 + Lstore->nsuper;
1035 int *nb = Lstore->sup_to_col;
1037 for (
int i = 0; i < nsuper; i++) {
1038 if (nb[i+1] - nb[i] > max_cols) {
1039 max_cols = nb[i+1] - nb[i];
1044 const magnitude_type multiply_fact (10.0);
1045 condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1047 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1048 int n = data_.perm_r.extent(0);
1049 std::cout << this->getComm()->getRank()
1050 <<
" : anorm = " << data_.anorm <<
", rcond = " << data_.rcond <<
", n = " << n
1051 <<
", num super cols = " << nsuper <<
", max super cols = " << max_cols
1052 <<
" -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1053 << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ?
" (okay)" :
" (warn)") << std::endl;
1058 if (sptrsv_use_spmv_ && !condition_flag) {
1059 device_khL_.create_sptrsv_handle(
1060 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
true);
1061 device_khU_.create_sptrsv_handle(
1062 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
false);
1064 device_khL_.create_sptrsv_handle(
1065 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
true);
1066 device_khU_.create_sptrsv_handle(
1067 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
false);
1071 device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1074 device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1075 device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1078 bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1079 bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1082 device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1083 device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1086 device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1087 device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1090 device_khL_.set_sptrsv_etree(data_.parents.data());
1091 device_khU_.set_sptrsv_etree(data_.parents.data());
1094 device_khL_.set_sptrsv_perm(data_.perm_r.data());
1095 device_khU_.set_sptrsv_perm(data_.perm_c.data());
1097 int block_size = -1;
1098 if (block_size >= 0) {
1099 std::cout <<
" Block Size : " << block_size << std::endl;
1100 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1101 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1106 #ifdef HAVE_AMESOS2_TIMERS
1107 Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv symbolic");
1108 Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1110 KokkosSparse::Experimental::sptrsv_symbolic
1111 (&device_khL_, &device_khU_, data_.L, data_.U);
1116 #ifdef HAVE_AMESOS2_TIMERS
1117 Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv numeric");
1118 Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1120 KokkosSparse::Experimental::sptrsv_compute
1121 (&device_khL_, &device_khU_, data_.L, data_.U);
1123 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1126 template <
class Matrix,
class Vector>
1128 Superlu<Matrix,Vector>::triangular_solve()
const
1130 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1131 size_t ld_rhs = device_xValues_.extent(0);
1132 size_t nrhs = device_xValues_.extent(1);
1134 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1135 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1138 auto local_device_bValues = device_bValues_;
1139 auto local_device_trsv_perm_r = device_trsv_perm_r_;
1140 auto local_device_trsv_rhs = device_trsv_rhs_;
1143 auto local_device_trsv_R = device_trsv_R_;
1144 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1145 KOKKOS_LAMBDA(
size_t j) {
1146 for(
size_t k = 0; k < nrhs; ++k) {
1147 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1151 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1152 KOKKOS_LAMBDA(
size_t j) {
1153 for(
size_t k = 0; k < nrhs; ++k) {
1154 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1159 for(
size_t k = 0; k < nrhs; ++k) {
1160 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1161 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1165 #ifdef HAVE_AMESOS2_TIMERS
1166 Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for L solve");
1167 Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1169 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1173 #ifdef HAVE_AMESOS2_TIMERS
1174 Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for U solve");
1175 Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1177 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1182 auto local_device_xValues = device_xValues_;
1183 auto local_device_trsv_perm_c = device_trsv_perm_c_;
1185 auto local_device_trsv_C = device_trsv_C_;
1186 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1187 KOKKOS_LAMBDA(
size_t j) {
1188 for(
size_t k = 0; k < nrhs; ++k) {
1189 local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1193 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1194 KOKKOS_LAMBDA(
size_t j) {
1195 for(
size_t k = 0; k < nrhs; ++k) {
1196 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1200 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1203 template<
class Matrix,
class Vector>
1204 const char* Superlu<Matrix,Vector>::name =
"SuperLU";
1209 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:937
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:283
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:94
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:674
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:793
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:117
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:473
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superlu_def.hpp:685
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:171
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:196
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:42