19 #ifndef AMESOS2_SUPERLU_DEF_HPP
20 #define AMESOS2_SUPERLU_DEF_HPP
22 #include <Teuchos_Tuple.hpp>
23 #include <Teuchos_ParameterList.hpp>
24 #include <Teuchos_StandardParameterEntryValidators.hpp>
29 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
36 #include "KokkosSparse_sptrsv_superlu.hpp"
42 template <
class Matrix,
class Vector>
44 Teuchos::RCP<const Matrix> A,
45 Teuchos::RCP<Vector> X,
46 Teuchos::RCP<const Vector> B )
48 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
49 , sptrsv_invert_diag_(true)
50 , sptrsv_invert_offdiag_ (false)
51 , sptrsv_u_in_csr_ (true)
52 , sptrsv_merge_supernodes_ (false)
53 , sptrsv_use_spmv_ (false)
55 , is_contiguous_(true)
56 , use_triangular_solves_(false)
58 , symmetrize_metis_(true)
65 SLU::set_default_options(&(data_.options));
67 data_.options.PrintStat = SLU::NO;
69 SLU::StatInit(&(data_.stat));
71 Kokkos::resize(data_.perm_r, this->globalNumRows_);
72 Kokkos::resize(data_.perm_c, this->globalNumCols_);
73 Kokkos::resize(data_.etree, this->globalNumCols_);
74 Kokkos::resize(data_.R, this->globalNumRows_);
75 Kokkos::resize(data_.C, this->globalNumCols_);
77 data_.relax = SLU::sp_ienv(2);
78 data_.panel_size = SLU::sp_ienv(1);
93 template <
class Matrix,
class Vector>
101 SLU::StatFree( &(data_.stat) ) ;
104 if ( data_.A.Store != NULL ){
105 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
109 if ( data_.L.Store != NULL ){
110 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
111 SLU::Destroy_CompCol_Matrix( &(data_.U) );
115 template <
class Matrix,
class Vector>
119 std::ostringstream oss;
120 oss <<
"SuperLU solver interface";
122 oss <<
", \"ILUTP\" : {";
123 oss <<
"drop tol = " << data_.options.ILU_DropTol;
124 oss <<
", fill factor = " << data_.options.ILU_FillFactor;
125 oss <<
", fill tol = " << data_.options.ILU_FillTol;
126 switch(data_.options.ILU_MILU) {
138 oss <<
", regular ILU";
140 switch(data_.options.ILU_Norm) {
149 oss <<
", infinity-norm";
153 oss <<
", direct solve";
169 template<
class Matrix,
class Vector>
181 int permc_spec = data_.options.ColPerm;
182 if (!use_metis_ && permc_spec != SLU::MY_PERMC && this->root_ ){
183 #ifdef HAVE_AMESOS2_TIMERS
184 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
187 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
194 template <
class Matrix,
class Vector>
209 #ifdef HAVE_AMESOS2_TIMERS
210 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
212 same_symbolic_ =
false;
213 data_.options.Fact = SLU::DOFACT;
216 #ifdef HAVE_AMESOS2_TIMERS
217 Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for METIS");
218 Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
220 size_t n = this->globalNumRows_;
221 size_t nz = this->globalNumNonZeros_;
222 host_size_type_array host_rowind_view_ = host_rows_view_;
223 host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
226 if (symmetrize_metis_) {
233 SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
234 &new_nz, &new_col_ptr, &new_row_ind);
239 Kokkos::resize (host_rowind_view_, nz);
240 Kokkos::realloc(host_colptr_view_, n+1);
241 for (
size_t i = 0; i <= n; i++) {
242 host_colptr_view_(i) = new_col_ptr[i];
244 for (
size_t i = 0; i < nz; i++) {
245 host_rowind_view_(i) = new_row_ind[i];
249 SLU::SUPERLU_FREE(new_col_ptr);
251 SLU::SUPERLU_FREE(new_row_ind);
256 using metis_int_array_type = host_ordinal_type_array;
257 metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_perm"), n);
258 metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_iperm"), n);
262 Amesos2::Util::reorder(
263 host_colptr_view_, host_rowind_view_,
264 metis_perm, metis_iperm, new_nnz,
false);
266 for (
size_t i = 0; i < n; i++) {
267 data_.perm_r(i) = metis_iperm(i);
268 data_.perm_c(i) = metis_iperm(i);
272 data_.options.ColPerm = SLU::MY_PERMC;
273 data_.options.RowPerm = SLU::MY_PERMR;
281 template <
class Matrix,
class Vector>
290 if ( !same_symbolic_ && data_.L.Store != NULL ){
291 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
292 SLU::Destroy_CompCol_Matrix( &(data_.U) );
293 data_.L.Store = NULL;
294 data_.U.Store = NULL;
297 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
302 #ifdef HAVE_AMESOS2_DEBUG
303 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
305 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
306 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
308 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
311 if( data_.options.Equil == SLU::YES ){
312 magnitude_type rowcnd, colcnd, amax;
316 function_map::gsequ(&(data_.A), data_.R.data(),
317 data_.C.data(), &rowcnd, &colcnd,
319 TEUCHOS_TEST_FOR_EXCEPTION
320 (info2 < 0, std::runtime_error,
321 "SuperLU's gsequ function returned with status " << info2 <<
" < 0."
322 " This means that argument " << (-info2) <<
" given to the function"
323 " had an illegal value.");
325 if (info2 <= data_.A.nrow) {
326 TEUCHOS_TEST_FOR_EXCEPTION
327 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
328 "info = " << info2 <<
" > 0, and info <= A.nrow = " << data_.A.nrow
329 <<
". This means that row " << info2 <<
" of A is exactly zero.");
331 else if (info2 > data_.A.ncol) {
332 TEUCHOS_TEST_FOR_EXCEPTION
333 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
334 "info = " << info2 <<
" > 0, and info > A.ncol = " << data_.A.ncol
335 <<
". This means that column " << (info2 - data_.A.nrow) <<
" of "
336 "A is exactly zero.");
339 TEUCHOS_TEST_FOR_EXCEPTION
340 (
true, std::runtime_error,
"SuperLU's gsequ function returned "
341 "with info = " << info2 <<
" > 0, but its value is not in the "
342 "range permitted by the documentation. This should never happen "
343 "(it appears to be SuperLU's fault).");
348 function_map::laqgs(&(data_.A), data_.R.data(),
349 data_.C.data(), rowcnd, colcnd,
350 amax, &(data_.equed));
353 data_.rowequ = (data_.equed ==
'R') || (data_.equed ==
'B');
354 data_.colequ = (data_.equed ==
'C') || (data_.equed ==
'B');
359 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
360 data_.etree.data(), &(data_.AC));
363 #ifdef HAVE_AMESOS2_TIMERS
364 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
367 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
368 std::cout <<
"Superlu:: Before numeric factorization" << std::endl;
369 std::cout <<
"nzvals_ : " << nzvals_.toString() << std::endl;
370 std::cout <<
"rowind_ : " << rowind_.toString() << std::endl;
371 std::cout <<
"colptr_ : " << colptr_.toString() << std::endl;
374 if(ILU_Flag_==
false) {
375 function_map::gstrf(&(data_.options), &(data_.AC),
376 data_.relax, data_.panel_size, data_.etree.data(),
377 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
378 &(data_.L), &(data_.U),
379 #ifdef HAVE_AMESOS2_SUPERLU5_API
382 &(data_.stat), &info);
385 function_map::gsitrf(&(data_.options), &(data_.AC),
386 data_.relax, data_.panel_size, data_.etree.data(),
387 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
388 &(data_.L), &(data_.U),
389 #ifdef HAVE_AMESOS2_SUPERLU5_API
392 &(data_.stat), &info);
395 if (data_.options.ConditionNumber == SLU::YES) {
397 if (data_.options.Trans == SLU::NOTRANS) {
398 *(
unsigned char *)norm =
'1';
400 *(
unsigned char *)norm =
'I';
403 data_.anorm = function_map::langs(norm, &(data_.A));
404 function_map::gscon(norm, &(data_.L), &(data_.U),
405 data_.anorm, &(data_.rcond),
406 &(data_.stat), &info);
410 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
413 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
414 ((SLU::NCformat*)data_.U.Store)->nnz) );
418 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
420 global_size_type info_st = as<global_size_type>(info);
421 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
423 "Factorization complete, but matrix is singular. Division by zero eminent");
424 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
426 "Memory allocation failure in Superlu factorization");
428 data_.options.Fact = SLU::FACTORED;
429 same_symbolic_ =
true;
431 if(use_triangular_solves_) {
432 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
433 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
434 if (this->getComm()->getRank()) {
435 std::cout <<
" > Metis : " << (use_metis_ ?
"YES" :
"NO") << std::endl;
436 std::cout <<
" > Equil : " << (data_.options.Equil == SLU::YES ?
"YES" :
"NO") << std::endl;
437 std::cout <<
" > Cond Number : " << (data_.options.ConditionNumber == SLU::YES ?
"YES" :
"NO") << std::endl;
438 std::cout <<
" > Invert diag : " << sptrsv_invert_diag_ << std::endl;
439 std::cout <<
" > Invert off-diag : " << sptrsv_invert_offdiag_ << std::endl;
440 std::cout <<
" > U in CSR : " << sptrsv_u_in_csr_ << std::endl;
441 std::cout <<
" > Merge : " << sptrsv_merge_supernodes_ << std::endl;
442 std::cout <<
" > Use SpMV : " << sptrsv_use_spmv_ << std::endl;
450 #ifdef HAVE_AMESOS2_TIMERS
451 Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv setup");
452 Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
454 triangular_solve_factor();
461 template <
class Matrix,
class Vector>
467 #ifdef HAVE_AMESOS2_TIMERS
468 Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for Amesos2");
469 Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
472 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
473 const size_t nrhs = X->getGlobalNumVectors();
475 bool bDidAssignX =
false;
476 bool bDidAssignB =
false;
478 #ifdef HAVE_AMESOS2_TIMERS
479 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
480 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
485 const bool initialize_data =
true;
486 const bool do_not_initialize_data =
false;
487 if(use_triangular_solves_) {
488 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
489 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
490 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
492 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
493 this->rowIndexBase_);
494 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
495 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
497 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
498 this->rowIndexBase_);
502 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
503 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
505 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
506 this->rowIndexBase_);
507 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
508 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
510 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
511 this->rowIndexBase_);
519 if(bDidAssignB && data_.equed !=
'N') {
520 if(use_triangular_solves_) {
521 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
522 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
523 device_bValues_.extent(0), device_bValues_.extent(1));
524 Kokkos::deep_copy(copyB, device_bValues_);
525 device_bValues_ = copyB;
529 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
530 host_bValues_.extent(0), host_bValues_.extent(1));
531 Kokkos::deep_copy(copyB, host_bValues_);
532 host_bValues_ = copyB;
538 magnitude_type rpg, rcond;
543 #ifdef HAVE_AMESOS2_TIMERS
544 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
547 if(use_triangular_solves_) {
551 Kokkos::resize(data_.ferr, nrhs);
552 Kokkos::resize(data_.berr, nrhs);
555 #ifdef HAVE_AMESOS2_TIMERS
556 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
558 SLU::Dtype_t dtype = type_map::dtype;
559 int i_ld_rhs = as<int>(ld_rhs);
560 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
561 convert_bValues_, host_bValues_, i_ld_rhs,
562 SLU::SLU_DN, dtype, SLU::SLU_GE);
563 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
564 convert_xValues_, host_xValues_, i_ld_rhs,
565 SLU::SLU_DN, dtype, SLU::SLU_GE);
568 if(ILU_Flag_==
false) {
569 function_map::gssvx(&(data_.options), &(data_.A),
570 data_.perm_c.data(), data_.perm_r.data(),
571 data_.etree.data(), &(data_.equed), data_.R.data(),
572 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
573 &(data_.X), &rpg, &rcond, data_.ferr.data(),
575 #ifdef HAVE_AMESOS2_SUPERLU5_API
578 &(data_.mem_usage), &(data_.stat), &ierr);
581 function_map::gsisx(&(data_.options), &(data_.A),
582 data_.perm_c.data(), data_.perm_r.data(),
583 data_.etree.data(), &(data_.equed), data_.R.data(),
584 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
585 &(data_.X), &rpg, &rcond,
586 #ifdef HAVE_AMESOS2_SUPERLU5_API
589 &(data_.mem_usage), &(data_.stat), &ierr);
593 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
594 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
595 data_.X.Store = NULL;
596 data_.B.Store = NULL;
603 #ifdef HAVE_AMESOS2_TIMERS
604 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
606 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
607 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
611 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
612 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
613 data_.X.Store = NULL;
614 data_.B.Store = NULL;
618 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
620 global_size_type ierr_st = as<global_size_type>(ierr);
621 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
622 std::invalid_argument,
623 "Argument " << -ierr <<
" to SuperLU xgssvx had illegal value" );
624 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
626 "Factorization complete, but U is exactly singular" );
627 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
629 "SuperLU allocated " << ierr - this->globalNumCols_ <<
" bytes of "
630 "memory before allocation failure occured." );
637 #ifdef HAVE_AMESOS2_TIMERS
638 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
641 if(use_triangular_solves_) {
642 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
643 Util::put_1d_data_helper_kokkos_view<
646 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
647 this->rowIndexBase_);
651 Util::put_1d_data_helper_kokkos_view<
654 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
655 this->rowIndexBase_);
663 template <
class Matrix,
class Vector>
670 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
674 template <
class Matrix,
class Vector>
679 using Teuchos::getIntegralValue;
680 using Teuchos::ParameterEntryValidator;
682 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
684 ILU_Flag_ = parameterList->get<
bool>(
"ILU_Flag",
false);
686 SLU::ilu_set_default_options(&(data_.options));
688 data_.options.PrintStat = SLU::NO;
691 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
693 if( parameterList->isParameter(
"Trans") ){
694 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"Trans").validator();
695 parameterList->getEntry(
"Trans").setValidator(trans_validator);
697 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList,
"Trans");
700 if( parameterList->isParameter(
"IterRefine") ){
701 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry(
"IterRefine").validator();
702 parameterList->getEntry(
"IterRefine").setValidator(refine_validator);
704 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList,
"IterRefine");
707 if( parameterList->isParameter(
"ColPerm") ){
708 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
709 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
711 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList,
"ColPerm");
714 data_.options.DiagPivotThresh = parameterList->get<
double>(
"DiagPivotThresh", 1.0);
716 bool equil = parameterList->get<
bool>(
"Equil",
true);
717 data_.options.Equil = equil ? SLU::YES : SLU::NO;
719 bool condNum = parameterList->get<
bool>(
"ConditionNumber",
false);
720 data_.options.ConditionNumber = condNum ? SLU::YES : SLU::NO;
722 bool symmetric_mode = parameterList->get<
bool>(
"SymmetricMode",
false);
723 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
726 if( parameterList->isParameter(
"RowPerm") ){
727 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry(
"RowPerm").validator();
728 parameterList->getEntry(
"RowPerm").setValidator(rowperm_validator);
729 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList,
"RowPerm");
738 data_.options.ILU_DropTol = parameterList->get<
double>(
"ILU_DropTol", 0.0001);
740 data_.options.ILU_FillFactor = parameterList->get<
double>(
"ILU_FillFactor", 10.0);
742 if( parameterList->isParameter(
"ILU_Norm") ) {
743 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry(
"ILU_Norm").validator();
744 parameterList->getEntry(
"ILU_Norm").setValidator(norm_validator);
745 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList,
"ILU_Norm");
748 if( parameterList->isParameter(
"ILU_MILU") ) {
749 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry(
"ILU_MILU").validator();
750 parameterList->getEntry(
"ILU_MILU").setValidator(milu_validator);
751 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList,
"ILU_MILU");
754 data_.options.ILU_FillTol = parameterList->get<
double>(
"ILU_FillTol", 0.01);
756 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous",
true);
758 use_metis_ = parameterList->get<
bool>(
"UseMetis",
false);
759 symmetrize_metis_ = parameterList->get<
bool>(
"SymmetrizeMetis",
true);
761 use_triangular_solves_ = parameterList->get<
bool>(
"Enable_KokkosKernels_TriangularSolves",
false);
762 if(use_triangular_solves_) {
763 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
765 sptrsv_invert_diag_ = parameterList->get<
bool>(
"SpTRSV_Invert_Diag",
true);
767 sptrsv_invert_offdiag_ = parameterList->get<
bool>(
"SpTRSV_Invert_OffDiag",
false);
769 sptrsv_u_in_csr_ = parameterList->get<
bool>(
"SpTRSV_U_CSR",
true);
771 sptrsv_merge_supernodes_ = parameterList->get<
bool>(
"SpTRSV_Merge_Supernodes",
false);
773 sptrsv_use_spmv_ = parameterList->get<
bool>(
"SpTRSV_Use_SpMV",
false);
775 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
776 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
782 template <
class Matrix,
class Vector>
783 Teuchos::RCP<const Teuchos::ParameterList>
787 using Teuchos::tuple;
788 using Teuchos::ParameterList;
789 using Teuchos::EnhancedNumberValidator;
790 using Teuchos::setStringToIntegralParameter;
791 using Teuchos::stringToIntegralParameterEntryValidator;
793 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
795 if( is_null(valid_params) ){
796 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
798 setStringToIntegralParameter<SLU::trans_t>(
"Trans",
"NOTRANS",
799 "Solve for the transpose system or not",
800 tuple<string>(
"TRANS",
"NOTRANS",
"CONJ"),
801 tuple<string>(
"Solve with transpose",
802 "Do not solve with transpose",
803 "Solve with the conjugate transpose"),
804 tuple<SLU::trans_t>(SLU::TRANS,
809 setStringToIntegralParameter<SLU::IterRefine_t>(
"IterRefine",
"NOREFINE",
810 "Type of iterative refinement to use",
811 tuple<string>(
"NOREFINE",
"SLU_SINGLE",
"SLU_DOUBLE"),
812 tuple<string>(
"Do not use iterative refinement",
813 "Do single iterative refinement",
814 "Do double iterative refinement"),
815 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
821 setStringToIntegralParameter<SLU::colperm_t>(
"ColPerm",
"COLAMD",
822 "Specifies how to permute the columns of the "
823 "matrix for sparsity preservation",
824 tuple<string>(
"NATURAL",
"MMD_AT_PLUS_A",
825 "MMD_ATA",
"COLAMD"),
826 tuple<string>(
"Natural ordering",
827 "Minimum degree ordering on A^T + A",
828 "Minimum degree ordering on A^T A",
829 "Approximate minimum degree column ordering"),
830 tuple<SLU::colperm_t>(SLU::NATURAL,
836 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
837 = Teuchos::rcp(
new EnhancedNumberValidator<double>(0.0, 1.0) );
838 pl->set(
"DiagPivotThresh", 1.0,
839 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
840 diag_pivot_thresh_validator);
842 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
843 pl->set(
"ConditionNumber",
false,
"Whether to approximate condition number");
845 pl->set(
"SymmetricMode",
false,
846 "Specifies whether to use the symmetric mode. "
847 "Gives preference to diagonal pivots and uses "
848 "an (A^T + A)-based column permutation.");
852 setStringToIntegralParameter<SLU::rowperm_t>(
"RowPerm",
"LargeDiag",
853 "Type of row permutation strategy to use",
854 tuple<string>(
"NOROWPERM",
"LargeDiag",
"MY_PERMR"),
855 tuple<string>(
"Use natural ordering",
856 "Use weighted bipartite matching algorithm",
857 "Use the ordering given in perm_r input"),
858 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
859 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
881 pl->set(
"ILU_DropTol", 0.0001,
"ILUT drop tolerance");
883 pl->set(
"ILU_FillFactor", 10.0,
"ILUT fill factor");
885 setStringToIntegralParameter<SLU::norm_t>(
"ILU_Norm",
"INF_NORM",
886 "Type of norm to use",
887 tuple<string>(
"ONE_NORM",
"TWO_NORM",
"INF_NORM"),
888 tuple<string>(
"1-norm",
"2-norm",
"inf-norm"),
889 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
892 setStringToIntegralParameter<SLU::milu_t>(
"ILU_MILU",
"SILU",
893 "Type of modified ILU to use",
894 tuple<string>(
"SILU",
"SMILU_1",
"SMILU_2",
"SMILU_3"),
895 tuple<string>(
"Regular ILU",
"MILU 1",
"MILU 2",
"MILU 3"),
896 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
900 pl->set(
"ILU_FillTol", 0.01,
"ILUT fill tolerance");
902 pl->set(
"ILU_Flag",
false,
"ILU flag: if true, run ILU routines");
904 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
907 pl->set(
"UseMetis",
false,
"Whether to call METIS before SuperLU");
908 pl->set(
"SymmetrizeMetis",
true,
"Whether to symmetrize matrix before METIS");
910 pl->set(
"Enable_KokkosKernels_TriangularSolves",
false,
"Whether to use triangular solves.");
911 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
912 pl->set(
"SpTRSV_Invert_Diag",
true,
"specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
913 pl->set(
"SpTRSV_Invert_OffDiag",
false,
"specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
914 pl->set(
"SpTRSV_U_CSR",
true,
"specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
915 pl->set(
"SpTRSV_Merge_Supernodes",
false,
"specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
916 pl->set(
"SpTRSV_Use_SpMV",
false,
"specify whether to use spmv for supernodal sparse-trianguular solve");
926 template <
class Matrix,
class Vector>
932 #ifdef HAVE_AMESOS2_TIMERS
933 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
937 if( current_phase == SYMBFACT )
return false;
940 if( data_.A.Store != NULL ){
941 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
942 data_.A.Store = NULL;
947 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
948 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
949 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
954 #ifdef HAVE_AMESOS2_TIMERS
955 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
958 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
960 "Row and column maps have different indexbase ");
964 host_size_type_array>::do_get(this->matrixA_.ptr(),
965 host_nzvals_view_, host_rows_view_,
966 host_col_ptr_view_, nnz_ret,
967 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
969 this->rowIndexBase_);
973 SLU::Dtype_t dtype = type_map::dtype;
976 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
978 "Did not get the expected number of non-zero vals");
980 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
981 this->globalNumRows_, this->globalNumCols_,
983 convert_nzvals_, host_nzvals_view_,
984 host_rows_view_.data(),
985 host_col_ptr_view_.data(),
986 SLU::SLU_NC, dtype, SLU::SLU_GE);
992 template <
class Matrix,
class Vector>
996 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
997 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1000 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1001 int nsuper = 1 + Lstore->nsuper;
1002 Kokkos::resize(data_.parents, nsuper);
1003 for (
int s = 0; s < nsuper; s++) {
1004 int j = Lstore->sup_to_col[s+1]-1;
1005 if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1006 data_.parents(s) = -1;
1008 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1012 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r);
1013 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c);
1014 if (data_.options.Equil == SLU::YES) {
1015 deep_copy_or_assign_view(device_trsv_R_, data_.R);
1016 deep_copy_or_assign_view(device_trsv_C_, data_.C);
1019 bool condition_flag =
false;
1020 if (data_.options.ConditionNumber == SLU::YES) {
1021 using STM = Teuchos::ScalarTraits<magnitude_type>;
1022 const magnitude_type eps = STM::eps ();
1024 SCformat *Lstore = (SCformat*)(data_.L.Store);
1025 int nsuper = 1 + Lstore->nsuper;
1026 int *nb = Lstore->sup_to_col;
1028 for (
int i = 0; i < nsuper; i++) {
1029 if (nb[i+1] - nb[i] > max_cols) {
1030 max_cols = nb[i+1] - nb[i];
1035 const magnitude_type multiply_fact (10.0);
1036 condition_flag = (((double)max_cols * nsuper) * eps * multiply_fact >= data_.rcond);
1038 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
1039 int n = data_.perm_r.extent(0);
1040 std::cout << this->getComm()->getRank()
1041 <<
" : anorm = " << data_.anorm <<
", rcond = " << data_.rcond <<
", n = " << n
1042 <<
", num super cols = " << nsuper <<
", max super cols = " << max_cols
1043 <<
" -> " << ((double)max_cols * nsuper) * eps / data_.rcond
1044 << (((double)max_cols * nsuper) * eps * multiply_fact < data_.rcond ?
" (okay)" :
" (warn)") << std::endl;
1049 if (sptrsv_use_spmv_ && !condition_flag) {
1050 device_khL_.create_sptrsv_handle(
1051 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
true);
1052 device_khU_.create_sptrsv_handle(
1053 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
false);
1055 device_khL_.create_sptrsv_handle(
1056 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
true);
1057 device_khU_.create_sptrsv_handle(
1058 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
false);
1062 device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1065 device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1066 device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1069 bool sptrsv_invert_diag = (!condition_flag && sptrsv_invert_diag_);
1070 bool sptrsv_invert_offdiag = (!condition_flag && sptrsv_invert_offdiag_);
1073 device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1074 device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag);
1077 device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1078 device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag);
1081 device_khL_.set_sptrsv_etree(data_.parents.data());
1082 device_khU_.set_sptrsv_etree(data_.parents.data());
1085 device_khL_.set_sptrsv_perm(data_.perm_r.data());
1086 device_khU_.set_sptrsv_perm(data_.perm_c.data());
1088 int block_size = -1;
1089 if (block_size >= 0) {
1090 std::cout <<
" Block Size : " << block_size << std::endl;
1091 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1092 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1097 #ifdef HAVE_AMESOS2_TIMERS
1098 Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv symbolic");
1099 Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1101 KokkosSparse::Experimental::sptrsv_symbolic
1102 (&device_khL_, &device_khU_, data_.L, data_.U);
1107 #ifdef HAVE_AMESOS2_TIMERS
1108 Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv numeric");
1109 Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1111 KokkosSparse::Experimental::sptrsv_compute
1112 (&device_khL_, &device_khU_, data_.L, data_.U);
1114 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1117 template <
class Matrix,
class Vector>
1119 Superlu<Matrix,Vector>::triangular_solve()
const
1121 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
1122 size_t ld_rhs = device_xValues_.extent(0);
1123 size_t nrhs = device_xValues_.extent(1);
1125 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1126 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1129 auto local_device_bValues = device_bValues_;
1130 auto local_device_trsv_perm_r = device_trsv_perm_r_;
1131 auto local_device_trsv_rhs = device_trsv_rhs_;
1134 auto local_device_trsv_R = device_trsv_R_;
1135 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1136 KOKKOS_LAMBDA(
size_t j) {
1137 for(
size_t k = 0; k < nrhs; ++k) {
1138 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1142 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1143 KOKKOS_LAMBDA(
size_t j) {
1144 for(
size_t k = 0; k < nrhs; ++k) {
1145 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1150 for(
size_t k = 0; k < nrhs; ++k) {
1151 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1152 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1156 #ifdef HAVE_AMESOS2_TIMERS
1157 Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for L solve");
1158 Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1160 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1164 #ifdef HAVE_AMESOS2_TIMERS
1165 Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for U solve");
1166 Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1168 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1173 auto local_device_xValues = device_xValues_;
1174 auto local_device_trsv_perm_c = device_trsv_perm_c_;
1176 auto local_device_trsv_C = device_trsv_C_;
1177 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1178 KOKKOS_LAMBDA(
size_t j) {
1179 for(
size_t k = 0; k < nrhs; ++k) {
1180 local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1184 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1185 KOKKOS_LAMBDA(
size_t j) {
1186 for(
size_t k = 0; k < nrhs; ++k) {
1187 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1191 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1194 template<
class Matrix,
class Vector>
1195 const char* Superlu<Matrix,Vector>::name =
"SuperLU";
1200 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:928
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:283
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:94
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:665
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:784
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:117
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:463
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superlu_def.hpp:676
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:171
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:196
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:42