52 #ifndef AMESOS2_SUPERLUMT_DEF_HPP
53 #define AMESOS2_SUPERLUMT_DEF_HPP
55 #include <Teuchos_Tuple.hpp>
56 #include <Teuchos_ParameterList.hpp>
57 #include <Teuchos_StandardParameterEntryValidators.hpp>
76 int sp_colorder(SuperMatrix*,
int*,superlumt_options_t*,SuperMatrix*);
96 template <
class Matrix,
class Vector>
98 Teuchos::RCP<Vector> X,
99 Teuchos::RCP<const Vector> B )
104 , is_contiguous_(true)
106 Teuchos::RCP<Teuchos::ParameterList> default_params
110 data_.options.lwork = 0;
114 data_.options.perm_r = data_.perm_r.getRawPtr();
115 data_.options.perm_c = data_.perm_c.getRawPtr();
120 data_.options.refact = SLUMT::NO;
121 data_.equed = SLUMT::NOEQUIL;
122 data_.rowequ =
false;
123 data_.colequ =
false;
125 data_.A.Store = NULL;
126 data_.AC.Store = NULL;
127 data_.BX.Store = NULL;
128 data_.L.Store = NULL;
129 data_.U.Store = NULL;
131 data_.stat.ops = NULL;
135 template <
class Matrix,
class Vector>
145 if ( data_.A.Store != NULL ){
147 SLUMT::D::Destroy_SuperMatrix_Store( &(data_.A) );
151 if( data_.AC.Store != NULL ){
152 SLUMT::D::Destroy_CompCol_Permuted( &(data_.AC) );
155 if ( data_.L.Store != NULL ){
156 SLUMT::D::Destroy_SuperNode_SCP( &(data_.L) );
157 SLUMT::D::Destroy_CompCol_NCP( &(data_.U) );
160 free( data_.options.etree );
161 free( data_.options.colcnt_h );
162 free( data_.options.part_super_h );
167 if ( data_.BX.Store != NULL ){
173 SLUMT::D::Destroy_SuperMatrix_Store( &(data_.BX) );
176 if ( data_.stat.ops != NULL )
177 SLUMT::D::StatFree( &(data_.stat) );
180 template<
class Matrix,
class Vector>
185 int perm_spec = data_.options.ColPerm;
186 if( perm_spec != SLUMT::MY_PERMC && this->root_ ){
187 #ifdef HAVE_AMESOS2_TIMERS
188 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
191 SLUMT::S::get_perm_c(perm_spec, &(data_.A), data_.perm_c.getRawPtr());
200 template <
class Matrix,
class Vector>
208 data_.options.refact = SLUMT::NO;
210 if( this->status_.numericFactorizationDone() && this->root_ ){
215 SLUMT::D::Destroy_SuperNode_Matrix( &(data_.L) );
216 SLUMT::D::Destroy_CompCol_NCP( &(data_.U) );
217 data_.L.Store = NULL;
218 data_.U.Store = NULL;
225 template <
class Matrix,
class Vector>
231 #ifdef HAVE_AMESOS2_DEBUG
232 const int nprocs = data_.options.nprocs;
233 TEUCHOS_TEST_FOR_EXCEPTION( nprocs <= 0,
234 std::invalid_argument,
235 "The number of threads to spawn should be greater than 0." );
242 if( data_.options.fact == SLUMT::EQUILIBRATE ){
243 magnitude_type rowcnd, colcnd, amax;
246 function_map::gsequ(&(data_.A), data_.R.getRawPtr(),
247 data_.C.getRawPtr(), &rowcnd, &colcnd,
249 TEUCHOS_TEST_FOR_EXCEPTION( info != 0,
251 "SuperLU_MT gsequ returned with status " << info );
253 function_map::laqgs(&(data_.A), data_.R.getRawPtr(),
254 data_.C.getRawPtr(), rowcnd, colcnd,
255 amax, &(data_.equed));
257 data_.rowequ = (data_.equed == SLUMT::ROW) || (data_.equed == SLUMT::BOTH);
258 data_.colequ = (data_.equed == SLUMT::COL) || (data_.equed == SLUMT::BOTH);
260 data_.options.fact = SLUMT::DOFACT;
264 if( data_.AC.Store != NULL ){
265 SLUMT::D::Destroy_CompCol_Permuted( &(data_.AC) );
266 if( data_.options.refact == SLUMT::NO ){
267 free( data_.options.etree );
268 free( data_.options.colcnt_h );
269 free( data_.options.part_super_h );
271 data_.AC.Store = NULL;
275 SLUMT::sp_colorder(&(data_.A), data_.perm_c.getRawPtr(),
276 &(data_.options), &(data_.AC));
280 const int n = as<int>(this->globalNumCols_);
281 if( data_.stat.ops != NULL ){ SLUMT::D::StatFree( &(data_.stat) ); data_.stat.ops = NULL; }
282 SLUMT::D::StatAlloc(n, data_.options.nprocs, data_.options.panel_size,
283 data_.options.relax, &(data_.stat));
284 SLUMT::D::StatInit(n, data_.options.nprocs, &(data_.stat));
288 #ifdef HAVE_AMESOS2_TIMERS
289 Teuchos::TimeMonitor numFactTimer( this->timers_.numFactTime_ );
292 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
293 std::cout <<
"SuperLU_MT:: Before numeric factorization" << std::endl;
294 std::cout <<
"nzvals_ : " << nzvals_.toString() << std::endl;
295 std::cout <<
"rowind_ : " << rowind_.toString() << std::endl;
296 std::cout <<
"colptr_ : " << colptr_.toString() << std::endl;
299 function_map::gstrf(&(data_.options), &(data_.AC),
300 data_.perm_r.getRawPtr(), &(data_.L), &(data_.U),
301 &(data_.stat), &info);
305 this->setNnzLU( as<size_t>(((SLUMT::SCformat*)data_.L.Store)->nnz +
306 ((SLUMT::NCformat*)data_.U.Store)->nnz) );
310 const global_size_type info_st = as<global_size_type>(info);
311 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
313 "Factorization complete, but matrix is singular. Division by zero eminent");
314 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
316 "Memory allocation failure in SuperLU_MT factorization");
320 data_.options.fact = SLUMT::FACTORED;
321 data_.options.refact = SLUMT::YES;
324 Teuchos::broadcast(*(this->getComm()),0,&info);
329 template <
class Matrix,
class Vector>
336 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
337 const size_t nrhs = X->getGlobalNumVectors();
339 Teuchos::Array<slu_type> bxvals_(ld_rhs * nrhs);
340 size_t ldbx_ = as<size_t>(ld_rhs);
343 #ifdef HAVE_AMESOS2_TIMERS
344 Teuchos::TimeMonitor convTimer( this->timers_.vecConvTime_ );
345 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
348 if ( is_contiguous_ ==
true ) {
350 slu_type>::do_get(B, bxvals_(),
352 ROOTED, this->rowIndexBase_);
356 slu_type>::do_get(B, bxvals_(),
358 NON_CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
365 if( data_.BX.Store != NULL ){
366 SLUMT::D::Destroy_SuperMatrix_Store( &(data_.BX) );
367 data_.BX.Store = NULL;
371 #ifdef HAVE_AMESOS2_TIMERS
372 Teuchos::TimeMonitor convTimer( this->timers_.vecConvTime_ );
374 SLUMT::Dtype_t dtype = type_map::dtype;
375 function_map::create_Dense_Matrix(&(data_.BX), as<int>(ld_rhs), as<int>(nrhs),
376 bxvals_.getRawPtr(), as<int>(ldbx_),
377 SLUMT::SLU_DN, dtype, SLUMT::SLU_GE);
380 if( data_.options.trans == SLUMT::NOTRANS ){
383 Util::scale(bxvals_(), as<size_t>(ld_rhs), ldbx_, data_.R(),
384 SLUMT::slu_mt_mult<slu_type,magnitude_type>());
386 }
else if( data_.colequ ){
388 Util::scale(bxvals_(), as<size_t>(ld_rhs), ldbx_, data_.C(),
389 SLUMT::slu_mt_mult<slu_type,magnitude_type>());
394 #ifdef HAVE_AMESOS2_TIMERS
395 Teuchos::TimeMonitor solveTimer( this->timers_.solveTime_ );
398 function_map::gstrs(data_.options.trans, &(data_.L),
399 &(data_.U), data_.perm_r.getRawPtr(),
400 data_.perm_c.getRawPtr(), &(data_.BX),
401 &(data_.stat), &info);
406 Teuchos::broadcast(*(this->getComm()),0,&info);
408 TEUCHOS_TEST_FOR_EXCEPTION( info < 0,
410 "Argument " << -info <<
" to gstrs had an illegal value" );
413 if( data_.options.trans == SLUMT::NOTRANS ){
416 Util::scale(bxvals_(), as<size_t>(ld_rhs), ldbx_, data_.C(),
417 SLUMT::slu_mt_mult<slu_type,magnitude_type>());
419 }
else if( data_.rowequ ){
421 Util::scale(bxvals_(), as<size_t>(ld_rhs), ldbx_, data_.R(),
422 SLUMT::slu_mt_mult<slu_type,magnitude_type>());
427 #ifdef HAVE_AMESOS2_TIMERS
428 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
431 if ( is_contiguous_ ==
true ) {
445 template <
class Matrix,
class Vector>
452 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
456 template <
class Matrix,
class Vector>
462 using Teuchos::getIntegralValue;
463 using Teuchos::ParameterEntryValidator;
465 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
468 data_.options.nprocs = parameterList->get<
int>(
"nprocs", 1);
470 data_.options.trans = this->control_.useTranspose_ ? SLUMT::TRANS : SLUMT::NOTRANS;
472 if( parameterList->isParameter(
"trans") ){
473 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"trans").validator();
474 parameterList->getEntry(
"trans").setValidator(trans_validator);
476 data_.options.trans = getIntegralValue<SLUMT::trans_t>(*parameterList,
"trans");
479 data_.options.panel_size = parameterList->get<
int>(
"panel_size", SLUMT::D::sp_ienv(1));
481 data_.options.relax = parameterList->get<
int>(
"relax", SLUMT::D::sp_ienv(2));
483 const bool equil = parameterList->get<
bool>(
"Equil",
true);
484 data_.options.fact = equil ? SLUMT::EQUILIBRATE : SLUMT::DOFACT;
486 const bool symmetric_mode = parameterList->get<
bool>(
"SymmetricMode",
false);
487 data_.options.SymmetricMode = symmetric_mode ? SLUMT::YES : SLUMT::NO;
489 const bool print_stat = parameterList->get<
bool>(
"PrintStat",
false);
490 data_.options.PrintStat = print_stat ? SLUMT::YES : SLUMT::NO;
492 data_.options.diag_pivot_thresh = parameterList->get<
double>(
"diag_pivot_thresh", 1.0);
494 if( parameterList->isParameter(
"ColPerm") ){
495 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
496 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
498 data_.options.ColPerm = getIntegralValue<SLUMT::colperm_t>(*parameterList,
"ColPerm");
502 data_.options.usepr = SLUMT::NO;
504 if( parameterList->isParameter(
"IsContiguous") ){
505 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous");
510 template <
class Matrix,
class Vector>
511 Teuchos::RCP<const Teuchos::ParameterList>
515 using Teuchos::tuple;
516 using Teuchos::ParameterList;
517 using Teuchos::EnhancedNumberValidator;
518 using Teuchos::setStringToIntegralParameter;
519 using Teuchos::stringToIntegralParameterEntryValidator;
521 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
523 if( is_null(valid_params) ){
524 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
526 Teuchos::RCP<EnhancedNumberValidator<int> > nprocs_validator
527 = Teuchos::rcp(
new EnhancedNumberValidator<int>() );
528 nprocs_validator->setMin(1);
529 pl->set(
"nprocs", 1,
"The number of processors to factorize with", nprocs_validator);
531 setStringToIntegralParameter<SLUMT::trans_t>(
"trans",
"NOTRANS",
532 "Solve for the transpose system or not",
533 tuple<string>(
"TRANS",
"NOTRANS",
"CONJ"),
534 tuple<string>(
"Solve with transpose",
535 "Do not solve with transpose",
536 "Solve with the conjugate transpose"),
537 tuple<SLUMT::trans_t>(SLUMT::TRANS,
542 Teuchos::RCP<EnhancedNumberValidator<int> > panel_size_validator
543 = Teuchos::rcp(
new EnhancedNumberValidator<int>() );
544 panel_size_validator->setMin(1);
545 pl->set(
"panel_size", SLUMT::D::sp_ienv(1),
546 "Specifies the number of consecutive columns to be treated as a unit of task.",
547 panel_size_validator);
549 Teuchos::RCP<EnhancedNumberValidator<int> > relax_validator
550 = Teuchos::rcp(
new EnhancedNumberValidator<int>() );
551 relax_validator->setMin(1);
552 pl->set(
"relax", SLUMT::D::sp_ienv(2),
553 "Specifies the number of columns to be grouped as a relaxed supernode",
556 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
558 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
559 = Teuchos::rcp(
new EnhancedNumberValidator<double>(0.0, 1.0) );
560 pl->set(
"diag_pivot_thresh", 1.0,
561 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
562 diag_pivot_thresh_validator);
565 setStringToIntegralParameter<SLUMT::colperm_t>(
"ColPerm",
"COLAMD",
566 "Specifies how to permute the columns of the "
567 "matrix for sparsity preservation",
568 tuple<string>(
"NATURAL",
572 tuple<string>(
"Natural ordering",
573 "Minimum degree ordering on A^T + A",
574 "Minimum degree ordering on A^T A",
575 "Approximate minimum degree column ordering"),
576 tuple<SLUMT::colperm_t>(SLUMT::NATURAL,
577 SLUMT::MMD_AT_PLUS_A,
582 pl->set(
"SymmetricMode",
false,
"Specifies whether to use the symmetric mode");
587 pl->set(
"PrintStat",
false,
"Specifies whether to print the solver's statistics");
589 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
598 template <
class Matrix,
class Vector>
604 #ifdef HAVE_AMESOS2_TIMERS
605 Teuchos::TimeMonitor convTimer( this->timers_.mtxConvTime_ );
608 if( current_phase == SYMBFACT )
return false;
611 if( data_.A.Store != NULL ){
612 SLUMT::D::Destroy_SuperMatrix_Store( &(data_.A) );
613 data_.A.Store = NULL;
617 nzvals_.resize(this->globalNumNonZeros_);
618 rowind_.resize(this->globalNumNonZeros_);
619 colptr_.resize(this->globalNumCols_ + 1);
624 #ifdef HAVE_AMESOS2_TIMERS
625 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
629 if ( is_contiguous_ ==
true ) {
632 nzvals_, rowind_, colptr_,
633 nnz_ret, ROOTED, ARBITRARY, this->rowIndexBase_);
638 nzvals_, rowind_, colptr_,
639 nnz_ret, NON_CONTIGUOUS_AND_ROOTED, ARBITRARY, this->rowIndexBase_);
644 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
646 "rank=0 failed to get all nonzero values in getCcs()");
648 SLUMT::Dtype_t dtype = type_map::dtype;
649 function_map::create_CompCol_Matrix(&(data_.A),
650 as<int>(this->globalNumRows_),
651 as<int>(this->globalNumCols_),
657 dtype, SLUMT::SLU_GE);
659 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.Store == NULL,
661 "Failed to allocate matrix store" );
668 template<
class Matrix,
class Vector>
674 #endif // AMESOS2_SUPERLUMT_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlumt_def.hpp:447
Amesos2 interface to the Multi-threaded version of SuperLU.
Definition: Amesos2_Superlumt_decl.hpp:77
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_MT specific solve.
Definition: Amesos2_Superlumt_def.hpp:331
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:478
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:475
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_MT.
Definition: Amesos2_Superlumt_def.hpp:202
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:266
Utility functions for Amesos2.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlumt_def.hpp:512
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a const parameter list of all of the valid parameters that this->setParameterList(...) will accept.
Definition: Amesos2_SolverCore_def.hpp:314
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:654
Superlumt(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superlumt_def.hpp:97
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlumt_def.hpp:182
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
int numericFactorization_impl()
SuperLU_MT specific numeric factorization.
Definition: Amesos2_Superlumt_def.hpp:227
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superlumt_def.hpp:458
~Superlumt()
Destructor.
Definition: Amesos2_Superlumt_def.hpp:136
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:282
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:322
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlumt_def.hpp:600