53 #ifndef AMESOS2_SUPERLU_DEF_HPP
54 #define AMESOS2_SUPERLU_DEF_HPP
56 #include <Teuchos_Tuple.hpp>
57 #include <Teuchos_ParameterList.hpp>
58 #include <Teuchos_StandardParameterEntryValidators.hpp>
63 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
70 #include "KokkosSparse_sptrsv_superlu.hpp"
76 template <
class Matrix,
class Vector>
78 Teuchos::RCP<const Matrix> A,
79 Teuchos::RCP<Vector> X,
80 Teuchos::RCP<const Vector> B )
82 , is_contiguous_(true)
83 , use_triangular_solves_(false)
90 SLU::set_default_options(&(data_.options));
92 data_.options.PrintStat = SLU::NO;
94 SLU::StatInit(&(data_.stat));
96 Kokkos::resize(data_.perm_r, this->globalNumRows_);
97 Kokkos::resize(data_.perm_c, this->globalNumCols_);
98 Kokkos::resize(data_.etree, this->globalNumCols_);
99 Kokkos::resize(data_.R, this->globalNumRows_);
100 Kokkos::resize(data_.C, this->globalNumCols_);
102 data_.relax = SLU::sp_ienv(2);
103 data_.panel_size = SLU::sp_ienv(1);
106 data_.A.Store = NULL;
107 data_.L.Store = NULL;
108 data_.U.Store = NULL;
109 data_.X.Store = NULL;
110 data_.B.Store = NULL;
116 template <
class Matrix,
class Vector>
124 SLU::StatFree( &(data_.stat) ) ;
127 if ( data_.A.Store != NULL ){
128 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
132 if ( data_.L.Store != NULL ){
133 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
134 SLU::Destroy_CompCol_Matrix( &(data_.U) );
138 template <
class Matrix,
class Vector>
142 std::ostringstream oss;
143 oss <<
"SuperLU solver interface";
145 oss <<
", \"ILUTP\" : {";
146 oss <<
"drop tol = " << data_.options.ILU_DropTol;
147 oss <<
", fill factor = " << data_.options.ILU_FillFactor;
148 oss <<
", fill tol = " << data_.options.ILU_FillTol;
149 switch(data_.options.ILU_MILU) {
161 oss <<
", regular ILU";
163 switch(data_.options.ILU_Norm) {
172 oss <<
", infinity-norm";
176 oss <<
", direct solve";
192 template<
class Matrix,
class Vector>
204 int permc_spec = data_.options.ColPerm;
205 if ( permc_spec != SLU::MY_PERMC && this->root_ ){
206 #ifdef HAVE_AMESOS2_TIMERS
207 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
210 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
217 template <
class Matrix,
class Vector>
232 same_symbolic_ =
false;
233 data_.options.Fact = SLU::DOFACT;
239 template <
class Matrix,
class Vector>
248 if ( !same_symbolic_ && data_.L.Store != NULL ){
249 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
250 SLU::Destroy_CompCol_Matrix( &(data_.U) );
251 data_.L.Store = NULL;
252 data_.U.Store = NULL;
255 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
260 #ifdef HAVE_AMESOS2_DEBUG
261 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
263 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
264 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
266 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
269 if( data_.options.Equil == SLU::YES ){
270 magnitude_type rowcnd, colcnd, amax;
274 function_map::gsequ(&(data_.A), data_.R.data(),
275 data_.C.data(), &rowcnd, &colcnd,
277 TEUCHOS_TEST_FOR_EXCEPTION
278 (info2 < 0, std::runtime_error,
279 "SuperLU's gsequ function returned with status " << info2 <<
" < 0."
280 " This means that argument " << (-info2) <<
" given to the function"
281 " had an illegal value.");
283 if (info2 <= data_.A.nrow) {
284 TEUCHOS_TEST_FOR_EXCEPTION
285 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
286 "info = " << info2 <<
" > 0, and info <= A.nrow = " << data_.A.nrow
287 <<
". This means that row " << info2 <<
" of A is exactly zero.");
289 else if (info2 > data_.A.ncol) {
290 TEUCHOS_TEST_FOR_EXCEPTION
291 (
true, std::runtime_error,
"SuperLU's gsequ function returned with "
292 "info = " << info2 <<
" > 0, and info > A.ncol = " << data_.A.ncol
293 <<
". This means that column " << (info2 - data_.A.nrow) <<
" of "
294 "A is exactly zero.");
297 TEUCHOS_TEST_FOR_EXCEPTION
298 (
true, std::runtime_error,
"SuperLU's gsequ function returned "
299 "with info = " << info2 <<
" > 0, but its value is not in the "
300 "range permitted by the documentation. This should never happen "
301 "(it appears to be SuperLU's fault).");
306 function_map::laqgs(&(data_.A), data_.R.data(),
307 data_.C.data(), rowcnd, colcnd,
308 amax, &(data_.equed));
317 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
318 data_.etree.data(), &(data_.AC));
321 #ifdef HAVE_AMESOS2_TIMERS
322 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
325 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
326 std::cout <<
"Superlu:: Before numeric factorization" << std::endl;
327 std::cout <<
"nzvals_ : " << nzvals_.toString() << std::endl;
328 std::cout <<
"rowind_ : " << rowind_.toString() << std::endl;
329 std::cout <<
"colptr_ : " << colptr_.toString() << std::endl;
332 if(ILU_Flag_==
false) {
333 function_map::gstrf(&(data_.options), &(data_.AC),
334 data_.relax, data_.panel_size, data_.etree.data(),
335 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
336 &(data_.L), &(data_.U),
337 #ifdef HAVE_AMESOS2_SUPERLU5_API
340 &(data_.stat), &info);
343 function_map::gsitrf(&(data_.options), &(data_.AC),
344 data_.relax, data_.panel_size, data_.etree.data(),
345 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
346 &(data_.L), &(data_.U),
347 #ifdef HAVE_AMESOS2_SUPERLU5_API
350 &(data_.stat), &info);
355 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
358 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
359 ((SLU::NCformat*)data_.U.Store)->nnz) );
363 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
365 global_size_type info_st = as<global_size_type>(info);
366 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
368 "Factorization complete, but matrix is singular. Division by zero eminent");
369 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
371 "Memory allocation failure in Superlu factorization");
373 data_.options.Fact = SLU::FACTORED;
374 same_symbolic_ =
true;
376 if(use_triangular_solves_) {
377 triangular_solve_factor();
384 template <
class Matrix,
class Vector>
391 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
392 const size_t nrhs = X->getGlobalNumVectors();
395 #ifdef HAVE_AMESOS2_TIMERS
396 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
397 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
402 if(use_triangular_solves_) {
403 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
404 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
405 device_solve_array_t>::do_get(X, device_xValues_,
407 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
408 this->rowIndexBase_);
409 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
410 device_solve_array_t>::do_get(B, device_bValues_,
412 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
413 this->rowIndexBase_);
417 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
418 host_solve_array_t>::do_get(X, host_xValues_,
420 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
421 this->rowIndexBase_);
422 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
423 host_solve_array_t>::do_get(B, host_bValues_,
425 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
426 this->rowIndexBase_);
436 if(data_.equed !=
'N') {
437 if(use_triangular_solves_) {
438 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
439 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
440 device_bValues_.extent(0), device_bValues_.extent(1));
441 Kokkos::deep_copy(copyB, device_bValues_);
442 device_bValues_ = copyB;
446 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
447 host_bValues_.extent(0), host_bValues_.extent(1));
448 Kokkos::deep_copy(copyB, host_bValues_);
449 host_bValues_ = copyB;
455 magnitude_type rpg, rcond;
460 #ifdef HAVE_AMESOS2_TIMERS
461 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
464 if(use_triangular_solves_) {
468 Kokkos::resize(data_.ferr, nrhs);
469 Kokkos::resize(data_.berr, nrhs);
472 #ifdef HAVE_AMESOS2_TIMERS
473 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
475 SLU::Dtype_t dtype = type_map::dtype;
476 int i_ld_rhs = as<int>(ld_rhs);
477 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
478 convert_bValues_, host_bValues_, i_ld_rhs,
479 SLU::SLU_DN, dtype, SLU::SLU_GE);
480 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
481 convert_xValues_, host_xValues_, i_ld_rhs,
482 SLU::SLU_DN, dtype, SLU::SLU_GE);
485 if(ILU_Flag_==
false) {
486 function_map::gssvx(&(data_.options), &(data_.A),
487 data_.perm_c.data(), data_.perm_r.data(),
488 data_.etree.data(), &(data_.equed), data_.R.data(),
489 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
490 &(data_.X), &rpg, &rcond, data_.ferr.data(),
492 #ifdef HAVE_AMESOS2_SUPERLU5_API
495 &(data_.mem_usage), &(data_.stat), &ierr);
498 function_map::gsisx(&(data_.options), &(data_.A),
499 data_.perm_c.data(), data_.perm_r.data(),
500 data_.etree.data(), &(data_.equed), data_.R.data(),
501 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
502 &(data_.X), &rpg, &rcond,
503 #ifdef HAVE_AMESOS2_SUPERLU5_API
506 &(data_.mem_usage), &(data_.stat), &ierr);
510 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
511 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
512 data_.X.Store = NULL;
513 data_.B.Store = NULL;
520 #ifdef HAVE_AMESOS2_TIMERS
521 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
523 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
524 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
528 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
529 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
530 data_.X.Store = NULL;
531 data_.B.Store = NULL;
535 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
537 global_size_type ierr_st = as<global_size_type>(ierr);
538 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
539 std::invalid_argument,
540 "Argument " << -ierr <<
" to SuperLU xgssvx had illegal value" );
541 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
543 "Factorization complete, but U is exactly singular" );
544 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
546 "SuperLU allocated " << ierr - this->globalNumCols_ <<
" bytes of "
547 "memory before allocation failure occured." );
551 #ifdef HAVE_AMESOS2_TIMERS
552 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
555 if(use_triangular_solves_) {
556 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
557 Util::put_1d_data_helper_kokkos_view<
560 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
561 this->rowIndexBase_);
565 Util::put_1d_data_helper_kokkos_view<
568 (is_contiguous_ ==
true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
569 this->rowIndexBase_);
579 template <
class Matrix,
class Vector>
586 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
590 template <
class Matrix,
class Vector>
595 using Teuchos::getIntegralValue;
596 using Teuchos::ParameterEntryValidator;
598 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
600 ILU_Flag_ = parameterList->get<
bool>(
"ILU_Flag",
false);
602 SLU::ilu_set_default_options(&(data_.options));
604 data_.options.PrintStat = SLU::NO;
607 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
609 if( parameterList->isParameter(
"Trans") ){
610 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"Trans").validator();
611 parameterList->getEntry(
"Trans").setValidator(trans_validator);
613 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList,
"Trans");
616 if( parameterList->isParameter(
"IterRefine") ){
617 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry(
"IterRefine").validator();
618 parameterList->getEntry(
"IterRefine").setValidator(refine_validator);
620 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList,
"IterRefine");
623 if( parameterList->isParameter(
"ColPerm") ){
624 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
625 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
627 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList,
"ColPerm");
630 data_.options.DiagPivotThresh = parameterList->get<
double>(
"DiagPivotThresh", 1.0);
632 bool equil = parameterList->get<
bool>(
"Equil",
true);
633 data_.options.Equil = equil ? SLU::YES : SLU::NO;
635 bool symmetric_mode = parameterList->get<
bool>(
"SymmetricMode",
false);
636 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
639 if( parameterList->isParameter(
"RowPerm") ){
640 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry(
"RowPerm").validator();
641 parameterList->getEntry(
"RowPerm").setValidator(rowperm_validator);
642 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList,
"RowPerm");
651 data_.options.ILU_DropTol = parameterList->get<
double>(
"ILU_DropTol", 0.0001);
653 data_.options.ILU_FillFactor = parameterList->get<
double>(
"ILU_FillFactor", 10.0);
655 if( parameterList->isParameter(
"ILU_Norm") ) {
656 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry(
"ILU_Norm").validator();
657 parameterList->getEntry(
"ILU_Norm").setValidator(norm_validator);
658 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList,
"ILU_Norm");
661 if( parameterList->isParameter(
"ILU_MILU") ) {
662 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry(
"ILU_MILU").validator();
663 parameterList->getEntry(
"ILU_MILU").setValidator(milu_validator);
664 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList,
"ILU_MILU");
667 data_.options.ILU_FillTol = parameterList->get<
double>(
"ILU_FillTol", 0.01);
669 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous",
true);
670 use_triangular_solves_ = parameterList->get<
bool>(
"Enable_KokkosKernels_TriangularSolves",
false);
672 if(use_triangular_solves_) {
673 #if not defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) || not defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
674 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
675 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
681 template <
class Matrix,
class Vector>
682 Teuchos::RCP<const Teuchos::ParameterList>
686 using Teuchos::tuple;
687 using Teuchos::ParameterList;
688 using Teuchos::EnhancedNumberValidator;
689 using Teuchos::setStringToIntegralParameter;
690 using Teuchos::stringToIntegralParameterEntryValidator;
692 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
694 if( is_null(valid_params) ){
695 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
697 setStringToIntegralParameter<SLU::trans_t>(
"Trans",
"NOTRANS",
698 "Solve for the transpose system or not",
699 tuple<string>(
"TRANS",
"NOTRANS",
"CONJ"),
700 tuple<string>(
"Solve with transpose",
701 "Do not solve with transpose",
702 "Solve with the conjugate transpose"),
703 tuple<SLU::trans_t>(SLU::TRANS,
708 setStringToIntegralParameter<SLU::IterRefine_t>(
"IterRefine",
"NOREFINE",
709 "Type of iterative refinement to use",
710 tuple<string>(
"NOREFINE",
"SLU_SINGLE",
"SLU_DOUBLE"),
711 tuple<string>(
"Do not use iterative refinement",
712 "Do single iterative refinement",
713 "Do double iterative refinement"),
714 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
720 setStringToIntegralParameter<SLU::colperm_t>(
"ColPerm",
"COLAMD",
721 "Specifies how to permute the columns of the "
722 "matrix for sparsity preservation",
723 tuple<string>(
"NATURAL",
"MMD_AT_PLUS_A",
724 "MMD_ATA",
"COLAMD"),
725 tuple<string>(
"Natural ordering",
726 "Minimum degree ordering on A^T + A",
727 "Minimum degree ordering on A^T A",
728 "Approximate minimum degree column ordering"),
729 tuple<SLU::colperm_t>(SLU::NATURAL,
735 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
736 = Teuchos::rcp(
new EnhancedNumberValidator<double>(0.0, 1.0) );
737 pl->set(
"DiagPivotThresh", 1.0,
738 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
739 diag_pivot_thresh_validator);
741 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
743 pl->set(
"SymmetricMode",
false,
744 "Specifies whether to use the symmetric mode. "
745 "Gives preference to diagonal pivots and uses "
746 "an (A^T + A)-based column permutation.");
750 setStringToIntegralParameter<SLU::rowperm_t>(
"RowPerm",
"LargeDiag",
751 "Type of row permutation strategy to use",
752 tuple<string>(
"NOROWPERM",
"LargeDiag",
"MY_PERMR"),
753 tuple<string>(
"Use natural ordering",
754 "Use weighted bipartite matching algorithm",
755 "Use the ordering given in perm_r input"),
756 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
757 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2)
779 pl->set(
"ILU_DropTol", 0.0001,
"ILUT drop tolerance");
781 pl->set(
"ILU_FillFactor", 10.0,
"ILUT fill factor");
783 setStringToIntegralParameter<SLU::norm_t>(
"ILU_Norm",
"INF_NORM",
784 "Type of norm to use",
785 tuple<string>(
"ONE_NORM",
"TWO_NORM",
"INF_NORM"),
786 tuple<string>(
"1-norm",
"2-norm",
"inf-norm"),
787 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
790 setStringToIntegralParameter<SLU::milu_t>(
"ILU_MILU",
"SILU",
791 "Type of modified ILU to use",
792 tuple<string>(
"SILU",
"SMILU_1",
"SMILU_2",
"SMILU_3"),
793 tuple<string>(
"Regular ILU",
"MILU 1",
"MILU 2",
"MILU 3"),
794 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
798 pl->set(
"ILU_FillTol", 0.01,
"ILUT fill tolerance");
800 pl->set(
"ILU_Flag",
false,
"ILU flag: if true, run ILU routines");
802 pl->set(
"Enable_KokkosKernels_TriangularSolves",
false,
"Whether to use triangular solves.");
804 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
813 template <
class Matrix,
class Vector>
819 #ifdef HAVE_AMESOS2_TIMERS
820 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
824 if( current_phase == SYMBFACT )
return false;
827 if( data_.A.Store != NULL ){
828 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
829 data_.A.Store = NULL;
834 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
835 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
836 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
841 #ifdef HAVE_AMESOS2_TIMERS
842 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
845 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
847 "Row and column maps have different indexbase ");
849 if ( is_contiguous_ ==
true ) {
850 Util::get_ccs_helper_kokkos_view<
852 host_size_type_array>::do_get(this->matrixA_.ptr(),
853 host_nzvals_view_, host_rows_view_,
854 host_col_ptr_view_, nnz_ret, ROOTED,
856 this->rowIndexBase_);
859 Util::get_ccs_helper_kokkos_view<
861 host_size_type_array>::do_get(this->matrixA_.ptr(),
862 host_nzvals_view_, host_rows_view_,
863 host_col_ptr_view_, nnz_ret, CONTIGUOUS_AND_ROOTED,
865 this->rowIndexBase_);
870 SLU::Dtype_t dtype = type_map::dtype;
873 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
875 "Did not get the expected number of non-zero vals");
877 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
878 this->globalNumRows_, this->globalNumCols_,
880 convert_nzvals_, host_nzvals_view_,
881 host_rows_view_.data(),
882 host_col_ptr_view_.data(),
883 SLU::SLU_NC, dtype, SLU::SLU_GE);
889 template <
class Matrix,
class Vector>
893 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
894 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
897 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
898 int nsuper = 1 + Lstore->nsuper;
899 Kokkos::resize(data_.parents, nsuper);
900 for (
int s = 0; s < nsuper; s++) {
901 int j = Lstore->sup_to_col[s+1]-1;
902 if (data_.etree[j] == static_cast<int>(ld_rhs)) {
903 data_.parents(s) = -1;
905 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
909 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r);
910 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c);
913 device_khL_.create_sptrsv_handle(
914 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, ld_rhs,
true);
915 device_khU_.create_sptrsv_handle(
916 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, ld_rhs,
false);
918 const bool u_in_csr =
true;
919 device_khU_.set_sptrsv_column_major (!u_in_csr);
921 const bool merge =
false;
922 device_khL_.set_sptrsv_merge_supernodes (merge);
923 device_khU_.set_sptrsv_merge_supernodes (merge);
926 const bool invert_diag =
true;
927 device_khL_.set_sptrsv_invert_diagonal (invert_diag);
928 device_khU_.set_sptrsv_invert_diagonal (invert_diag);
931 const bool invert_offdiag =
false;
932 device_khL_.set_sptrsv_invert_offdiagonal (invert_offdiag);
933 device_khU_.set_sptrsv_invert_offdiagonal (invert_offdiag);
936 device_khL_.set_sptrsv_etree(data_.parents.data());
937 device_khU_.set_sptrsv_etree(data_.parents.data());
940 device_khL_.set_sptrsv_perm(data_.perm_r.data());
941 device_khU_.set_sptrsv_perm(data_.perm_c.data());
944 if (block_size >= 0) {
945 std::cout <<
" Block Size : " << block_size << std::endl;
946 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
947 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
951 KokkosSparse::Experimental::sptrsv_symbolic
952 (&device_khL_, &device_khU_, data_.L, data_.U);
955 KokkosSparse::Experimental::sptrsv_compute
956 (&device_khL_, &device_khU_, data_.L, data_.U);
957 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
960 template <
class Matrix,
class Vector>
962 Superlu<Matrix,Vector>::triangular_solve()
const
964 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
965 size_t ld_rhs = device_xValues_.extent(0);
966 size_t nrhs = device_xValues_.extent(1);
968 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
969 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
972 auto local_device_bValues = device_bValues_;
973 auto local_device_trsv_perm_r = device_trsv_perm_r_;
974 auto local_device_trsv_rhs = device_trsv_rhs_;
975 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
976 KOKKOS_LAMBDA(
size_t j) {
977 for(
size_t k = 0; k < nrhs; ++k) {
978 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
982 for(
size_t k = 0; k < nrhs; ++k) {
983 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
984 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
987 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
990 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
994 auto local_device_xValues = device_xValues_;
995 auto local_device_trsv_perm_c = device_trsv_perm_c_;
996 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
997 KOKKOS_LAMBDA(
size_t j) {
998 for(
size_t k = 0; k < nrhs; ++k) {
999 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1002 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
1005 template<
class Matrix,
class Vector>
1006 const char* Superlu<Matrix,Vector>::name =
"SuperLU";
1011 #endif // AMESOS2_SUPERLU_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:815
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:241
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:117
Amesos2 Superlu declarations.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:581
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:683
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:386
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superlu_def.hpp:592
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:194
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
std::string description() const
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:140
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:219
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:76