52 #ifndef AMESOS2_SUPERLUMT_DEF_HPP
53 #define AMESOS2_SUPERLUMT_DEF_HPP
55 #include <Teuchos_Tuple.hpp>
56 #include <Teuchos_ParameterList.hpp>
57 #include <Teuchos_StandardParameterEntryValidators.hpp>
76 int sp_colorder(SuperMatrix*,
int*,superlumt_options_t*,SuperMatrix*);
96 template <
class Matrix,
class Vector>
98 Teuchos::RCP<Vector> X,
99 Teuchos::RCP<const Vector> B )
104 , is_contiguous_(true)
106 Teuchos::RCP<Teuchos::ParameterList> default_params
110 data_.options.lwork = 0;
114 data_.options.perm_r = data_.perm_r.getRawPtr();
115 data_.options.perm_c = data_.perm_c.getRawPtr();
120 data_.options.refact = SLUMT::NO;
121 data_.equed = SLUMT::NOEQUIL;
122 data_.rowequ =
false;
123 data_.colequ =
false;
125 data_.A.Store = NULL;
126 data_.AC.Store = NULL;
127 data_.BX.Store = NULL;
128 data_.L.Store = NULL;
129 data_.U.Store = NULL;
131 data_.stat.ops = NULL;
135 template <
class Matrix,
class Vector>
145 if ( data_.A.Store != NULL ){
147 SLUMT::D::Destroy_SuperMatrix_Store( &(data_.A) );
151 if( data_.AC.Store != NULL ){
152 SLUMT::D::Destroy_CompCol_Permuted( &(data_.AC) );
155 if ( data_.L.Store != NULL ){
156 SLUMT::D::Destroy_SuperNode_SCP( &(data_.L) );
157 SLUMT::D::Destroy_CompCol_NCP( &(data_.U) );
160 free( data_.options.etree );
161 free( data_.options.colcnt_h );
162 free( data_.options.part_super_h );
167 if ( data_.BX.Store != NULL ){
173 SLUMT::D::Destroy_SuperMatrix_Store( &(data_.BX) );
176 if ( data_.stat.ops != NULL )
177 SLUMT::D::StatFree( &(data_.stat) );
180 template<
class Matrix,
class Vector>
185 int perm_spec = data_.options.ColPerm;
186 if( perm_spec != SLUMT::MY_PERMC && this->root_ ){
187 #ifdef HAVE_AMESOS2_TIMERS
188 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
191 SLUMT::S::get_perm_c(perm_spec, &(data_.A), data_.perm_c.getRawPtr());
200 template <
class Matrix,
class Vector>
208 data_.options.refact = SLUMT::NO;
210 if( this->status_.numericFactorizationDone() && this->root_ ){
215 SLUMT::D::Destroy_SuperNode_Matrix( &(data_.L) );
216 SLUMT::D::Destroy_CompCol_NCP( &(data_.U) );
217 data_.L.Store = NULL;
218 data_.U.Store = NULL;
225 template <
class Matrix,
class Vector>
231 #ifdef HAVE_AMESOS2_DEBUG
232 const int nprocs = data_.options.nprocs;
233 TEUCHOS_TEST_FOR_EXCEPTION( nprocs <= 0,
234 std::invalid_argument,
235 "The number of threads to spawn should be greater than 0." );
242 if( data_.options.fact == SLUMT::EQUILIBRATE ){
243 magnitude_type rowcnd, colcnd, amax;
246 function_map::gsequ(&(data_.A), data_.R.getRawPtr(),
247 data_.C.getRawPtr(), &rowcnd, &colcnd,
249 TEUCHOS_TEST_FOR_EXCEPTION( info != 0,
251 "SuperLU_MT gsequ returned with status " << info );
253 function_map::laqgs(&(data_.A), data_.R.getRawPtr(),
254 data_.C.getRawPtr(), rowcnd, colcnd,
255 amax, &(data_.equed));
257 data_.rowequ = (data_.equed == SLUMT::ROW) || (data_.equed == SLUMT::BOTH);
258 data_.colequ = (data_.equed == SLUMT::COL) || (data_.equed == SLUMT::BOTH);
260 data_.options.fact = SLUMT::DOFACT;
264 if( data_.AC.Store != NULL ){
265 SLUMT::D::Destroy_CompCol_Permuted( &(data_.AC) );
266 if( data_.options.refact == SLUMT::NO ){
267 free( data_.options.etree );
268 free( data_.options.colcnt_h );
269 free( data_.options.part_super_h );
271 data_.AC.Store = NULL;
275 SLUMT::sp_colorder(&(data_.A), data_.perm_c.getRawPtr(),
276 &(data_.options), &(data_.AC));
280 const int n = as<int>(this->globalNumCols_);
281 if( data_.stat.ops != NULL ){ SLUMT::D::StatFree( &(data_.stat) ); data_.stat.ops = NULL; }
282 SLUMT::D::StatAlloc(n, data_.options.nprocs, data_.options.panel_size,
283 data_.options.relax, &(data_.stat));
284 SLUMT::D::StatInit(n, data_.options.nprocs, &(data_.stat));
288 #ifdef HAVE_AMESOS2_TIMERS
289 Teuchos::TimeMonitor numFactTimer( this->timers_.numFactTime_ );
292 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
293 std::cout <<
"SuperLU_MT:: Before numeric factorization" << std::endl;
294 std::cout <<
"nzvals_ : " << nzvals_.toString() << std::endl;
295 std::cout <<
"rowind_ : " << rowind_.toString() << std::endl;
296 std::cout <<
"colptr_ : " << colptr_.toString() << std::endl;
299 function_map::gstrf(&(data_.options), &(data_.AC),
300 data_.perm_r.getRawPtr(), &(data_.L), &(data_.U),
301 &(data_.stat), &info);
305 this->setNnzLU( as<size_t>(((SLUMT::SCformat*)data_.L.Store)->nnz +
306 ((SLUMT::NCformat*)data_.U.Store)->nnz) );
310 const global_size_type info_st = as<global_size_type>(info);
311 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
313 "Factorization complete, but matrix is singular. Division by zero eminent");
314 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
316 "Memory allocation failure in SuperLU_MT factorization");
320 data_.options.fact = SLUMT::FACTORED;
321 data_.options.refact = SLUMT::YES;
324 Teuchos::broadcast(*(this->getComm()),0,&info);
329 template <
class Matrix,
class Vector>
336 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
337 const size_t nrhs = X->getGlobalNumVectors();
339 Teuchos::Array<slu_type> bxvals_(ld_rhs * nrhs);
340 size_t ldbx_ = as<size_t>(ld_rhs);
343 #ifdef HAVE_AMESOS2_TIMERS
344 Teuchos::TimeMonitor convTimer( this->timers_.vecConvTime_ );
345 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
348 if ( is_contiguous_ ==
true ) {
350 slu_type>::do_get(B, bxvals_(),
352 ROOTED, this->rowIndexBase_);
355 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
356 "NON_CONTIGUOUS_AND_ROOTED not supported.");
363 if( data_.BX.Store != NULL ){
364 SLUMT::D::Destroy_SuperMatrix_Store( &(data_.BX) );
365 data_.BX.Store = NULL;
369 #ifdef HAVE_AMESOS2_TIMERS
370 Teuchos::TimeMonitor convTimer( this->timers_.vecConvTime_ );
372 SLUMT::Dtype_t dtype = type_map::dtype;
373 function_map::create_Dense_Matrix(&(data_.BX), as<int>(ld_rhs), as<int>(nrhs),
374 bxvals_.getRawPtr(), as<int>(ldbx_),
375 SLUMT::SLU_DN, dtype, SLUMT::SLU_GE);
378 if( data_.options.trans == SLUMT::NOTRANS ){
381 Util::scale(bxvals_(), as<size_t>(ld_rhs), ldbx_, data_.R(),
382 SLUMT::slu_mt_mult<slu_type,magnitude_type>());
384 }
else if( data_.colequ ){
386 Util::scale(bxvals_(), as<size_t>(ld_rhs), ldbx_, data_.C(),
387 SLUMT::slu_mt_mult<slu_type,magnitude_type>());
392 #ifdef HAVE_AMESOS2_TIMERS
393 Teuchos::TimeMonitor solveTimer( this->timers_.solveTime_ );
396 function_map::gstrs(data_.options.trans, &(data_.L),
397 &(data_.U), data_.perm_r.getRawPtr(),
398 data_.perm_c.getRawPtr(), &(data_.BX),
399 &(data_.stat), &info);
404 Teuchos::broadcast(*(this->getComm()),0,&info);
406 TEUCHOS_TEST_FOR_EXCEPTION( info < 0,
408 "Argument " << -info <<
" to gstrs had an illegal value" );
411 if( data_.options.trans == SLUMT::NOTRANS ){
414 Util::scale(bxvals_(), as<size_t>(ld_rhs), ldbx_, data_.C(),
415 SLUMT::slu_mt_mult<slu_type,magnitude_type>());
417 }
else if( data_.rowequ ){
419 Util::scale(bxvals_(), as<size_t>(ld_rhs), ldbx_, data_.R(),
420 SLUMT::slu_mt_mult<slu_type,magnitude_type>());
425 #ifdef HAVE_AMESOS2_TIMERS
426 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
429 if ( is_contiguous_ ==
true ) {
434 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
435 "NON_CONTIGUOUS_AND_ROOTED not supported.");
443 template <
class Matrix,
class Vector>
450 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
454 template <
class Matrix,
class Vector>
460 using Teuchos::getIntegralValue;
461 using Teuchos::ParameterEntryValidator;
463 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
465 int default_nprocs = Kokkos::DefaultHostExecutionSpace().concurrency();
466 data_.options.nprocs = parameterList->get<
int>(
"nprocs", default_nprocs);
468 data_.options.trans = this->control_.useTranspose_ ? SLUMT::TRANS : SLUMT::NOTRANS;
470 if( parameterList->isParameter(
"trans") ){
471 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"trans").validator();
472 parameterList->getEntry(
"trans").setValidator(trans_validator);
474 data_.options.trans = getIntegralValue<SLUMT::trans_t>(*parameterList,
"trans");
477 data_.options.panel_size = parameterList->get<
int>(
"panel_size", SLUMT::D::sp_ienv(1));
479 data_.options.relax = parameterList->get<
int>(
"relax", SLUMT::D::sp_ienv(2));
481 const bool equil = parameterList->get<
bool>(
"Equil",
true);
482 data_.options.fact = equil ? SLUMT::EQUILIBRATE : SLUMT::DOFACT;
484 const bool symmetric_mode = parameterList->get<
bool>(
"SymmetricMode",
false);
485 data_.options.SymmetricMode = symmetric_mode ? SLUMT::YES : SLUMT::NO;
487 const bool print_stat = parameterList->get<
bool>(
"PrintStat",
false);
488 data_.options.PrintStat = print_stat ? SLUMT::YES : SLUMT::NO;
490 data_.options.diag_pivot_thresh = parameterList->get<
double>(
"diag_pivot_thresh", 1.0);
492 if( parameterList->isParameter(
"ColPerm") ){
493 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
494 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
496 data_.options.ColPerm = getIntegralValue<SLUMT::colperm_t>(*parameterList,
"ColPerm");
500 data_.options.usepr = SLUMT::NO;
502 if( parameterList->isParameter(
"IsContiguous") ){
503 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous");
508 template <
class Matrix,
class Vector>
509 Teuchos::RCP<const Teuchos::ParameterList>
513 using Teuchos::tuple;
514 using Teuchos::ParameterList;
515 using Teuchos::EnhancedNumberValidator;
516 using Teuchos::setStringToIntegralParameter;
517 using Teuchos::stringToIntegralParameterEntryValidator;
519 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
521 if( is_null(valid_params) ){
522 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
524 int default_nprocs = Kokkos::DefaultHostExecutionSpace().concurrency();
525 Teuchos::RCP<EnhancedNumberValidator<int> > nprocs_validator
526 = Teuchos::rcp(
new EnhancedNumberValidator<int>() );
527 nprocs_validator->setMin(default_nprocs);
528 pl->set(
"nprocs", default_nprocs,
"The number of processors to factorize with", nprocs_validator);
530 setStringToIntegralParameter<SLUMT::trans_t>(
"trans",
"NOTRANS",
531 "Solve for the transpose system or not",
532 tuple<string>(
"TRANS",
"NOTRANS",
"CONJ"),
533 tuple<string>(
"Solve with transpose",
534 "Do not solve with transpose",
535 "Solve with the conjugate transpose"),
536 tuple<SLUMT::trans_t>(SLUMT::TRANS,
541 Teuchos::RCP<EnhancedNumberValidator<int> > panel_size_validator
542 = Teuchos::rcp(
new EnhancedNumberValidator<int>() );
543 panel_size_validator->setMin(1);
544 pl->set(
"panel_size", SLUMT::D::sp_ienv(1),
545 "Specifies the number of consecutive columns to be treated as a unit of task.",
546 panel_size_validator);
548 Teuchos::RCP<EnhancedNumberValidator<int> > relax_validator
549 = Teuchos::rcp(
new EnhancedNumberValidator<int>() );
550 relax_validator->setMin(1);
551 pl->set(
"relax", SLUMT::D::sp_ienv(2),
552 "Specifies the number of columns to be grouped as a relaxed supernode",
555 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
557 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
558 = Teuchos::rcp(
new EnhancedNumberValidator<double>(0.0, 1.0) );
559 pl->set(
"diag_pivot_thresh", 1.0,
560 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
561 diag_pivot_thresh_validator);
564 setStringToIntegralParameter<SLUMT::colperm_t>(
"ColPerm",
"COLAMD",
565 "Specifies how to permute the columns of the "
566 "matrix for sparsity preservation",
567 tuple<string>(
"NATURAL",
571 tuple<string>(
"Natural ordering",
572 "Minimum degree ordering on A^T + A",
573 "Minimum degree ordering on A^T A",
574 "Approximate minimum degree column ordering"),
575 tuple<SLUMT::colperm_t>(SLUMT::NATURAL,
576 SLUMT::MMD_AT_PLUS_A,
581 pl->set(
"SymmetricMode",
false,
"Specifies whether to use the symmetric mode");
586 pl->set(
"PrintStat",
false,
"Specifies whether to print the solver's statistics");
588 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
597 template <
class Matrix,
class Vector>
603 #ifdef HAVE_AMESOS2_TIMERS
604 Teuchos::TimeMonitor convTimer( this->timers_.mtxConvTime_ );
607 if( current_phase == SYMBFACT )
return false;
610 if( data_.A.Store != NULL ){
611 SLUMT::D::Destroy_SuperMatrix_Store( &(data_.A) );
612 data_.A.Store = NULL;
616 nzvals_.resize(this->globalNumNonZeros_);
617 rowind_.resize(this->globalNumNonZeros_);
618 colptr_.resize(this->globalNumCols_ + 1);
623 #ifdef HAVE_AMESOS2_TIMERS
624 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
628 if ( is_contiguous_ ==
true ) {
629 Util::get_ccs_helper<
631 nzvals_, rowind_, colptr_,
632 nnz_ret, ROOTED, ARBITRARY, this->rowIndexBase_);
635 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
636 "NON_CONTIGUOUS_AND_ROOTED not supported.");
641 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
643 "rank=0 failed to get all nonzero values in getCcs()");
645 SLUMT::Dtype_t dtype = type_map::dtype;
646 function_map::create_CompCol_Matrix(&(data_.A),
647 as<int>(this->globalNumRows_),
648 as<int>(this->globalNumCols_),
654 dtype, SLUMT::SLU_GE);
656 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.Store == NULL,
658 "Failed to allocate matrix store" );
665 template<
class Matrix,
class Vector>
671 #endif // AMESOS2_SUPERLUMT_DEF_HPP
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Return a const parameter list of all of the valid parameters that this->setParameterList(...) will accept.
Definition: Amesos2_SolverCore_def.hpp:558
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlumt_def.hpp:445
Amesos2 interface to the Multi-threaded version of SuperLU.
Definition: Amesos2_Superlumt_decl.hpp:77
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_MT specific solve.
Definition: Amesos2_Superlumt_def.hpp:331
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:479
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList) override
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:526
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:476
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_MT.
Definition: Amesos2_Superlumt_def.hpp:202
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:267
Utility functions for Amesos2.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlumt_def.hpp:510
Superlumt(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superlumt_def.hpp:97
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlumt_def.hpp:182
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
int numericFactorization_impl()
SuperLU_MT specific numeric factorization.
Definition: Amesos2_Superlumt_def.hpp:227
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superlumt_def.hpp:456
~Superlumt()
Destructor.
Definition: Amesos2_Superlumt_def.hpp:136
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:373
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlumt_def.hpp:599