18 #ifndef AMESOS2_SUPERLUDIST_DEF_HPP
19 #define AMESOS2_SUPERLUDIST_DEF_HPP
21 #include <Teuchos_Tuple.hpp>
22 #include <Teuchos_StandardParameterEntryValidators.hpp>
23 #include <Teuchos_DefaultMpiComm.hpp>
24 #include <Teuchos_Details_MpiTypeTraits.hpp>
34 template <
class Matrix,
class Vector>
36 Teuchos::RCP<Vector> X,
37 Teuchos::RCP<const Vector> B)
42 , force_symbfact_(false)
43 , is_contiguous_(true)
48 using Teuchos::MpiComm;
49 using Teuchos::outArg;
50 using Teuchos::ParameterList;
51 using Teuchos::parameterList;
54 using Teuchos::rcp_dynamic_cast;
55 using Teuchos::REDUCE_SUM;
56 using Teuchos::reduceAll;
57 typedef global_ordinal_type GO;
58 typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
64 RCP<const Comm<int> > comm = this->
getComm ();
65 const int myRank = comm->getRank ();
66 const int numProcs = comm->getSize ();
68 SLUD::int_t nprow, npcol;
75 RCP<const Comm<int> > matComm = this->
matrixA_->getComm ();
76 TEUCHOS_TEST_FOR_EXCEPTION(
77 matComm.is_null (), std::logic_error,
"Amesos2::Superlustdist "
78 "constructor: The matrix's communicator is null!");
79 RCP<const MpiComm<int> > matMpiComm =
80 rcp_dynamic_cast<
const MpiComm<int> > (matComm);
85 TEUCHOS_TEST_FOR_EXCEPTION(
86 matMpiComm.is_null (), std::logic_error,
"Amesos2::Superlustdist "
87 "constructor: The matrix's communicator is not an MpiComm!");
88 TEUCHOS_TEST_FOR_EXCEPTION(
89 matMpiComm->getRawMpiComm ().is_null (), std::logic_error,
"Amesos2::"
90 "Superlustdist constructor: The matrix's communicator claims to be a "
91 "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
92 "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
93 "exist, which likely implies that the Teuchos::MpiComm was constructed "
94 "incorrectly. It means something different than if the MPI_Comm were "
96 MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
97 data_.mat_comm = rawMpiComm;
103 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
112 SLUD::set_default_options_dist(&data_.options);
114 RCP<ParameterList> default_params =
119 data_.options.Fact = SLUD::DOFACT;
120 data_.equed = SLUD::NOEQUIL;
121 data_.options.SolveInitialized = SLUD::NO;
122 data_.options.RefineInitialized = SLUD::NO;
123 data_.rowequ =
false;
124 data_.colequ =
false;
127 data_.largediag_mc64_job = 4;
137 data_.symb_comm = MPI_COMM_NULL;
142 data_.domains = (
int) ( pow(2.0, floor(log10((
double)nprow*npcol)/log10(2.0))) );
144 const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
145 MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
157 int myProcParticipates = 0;
158 if (myRank < nprow * npcol) {
160 myProcParticipates = 1;
165 int numParticipatingProcs = 0;
166 reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
167 outArg (numParticipatingProcs));
168 TEUCHOS_TEST_FOR_EXCEPTION(
169 this->globalNumRows_ != 0 && numParticipatingProcs == 0,
170 std::logic_error,
"Amesos2::Superludist constructor: The matrix has "
171 << this->globalNumRows_ <<
" > 0 global row(s), but no processes in the "
172 "communicator want to participate in its factorization! nprow = "
173 << nprow <<
" and npcol = " << npcol <<
".");
176 size_t myNumRows = 0;
179 const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
180 GNR /
static_cast<GO
> (numParticipatingProcs);
182 GNR - quotient *
static_cast<GO
> (numParticipatingProcs);
183 const GO lclNumRows = (
static_cast<GO
> (myRank) < remainder) ?
184 (quotient +
static_cast<GO
> (1)) : quotient;
185 myNumRows =
static_cast<size_t> (lclNumRows);
189 const GO indexBase = this->
matrixA_->getRowMap ()->getIndexBase ();
191 rcp (
new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
197 data_.A.Store = NULL;
198 function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
199 SLUD::PStatInit(&(data_.stat));
202 data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
203 data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
207 template <
class Matrix,
class Vector>
219 if ( this->status_.getNumPreOrder() > 0 ){
221 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
222 SUPERLU_FREE( data_.sizes );
223 SUPERLU_FREE( data_.fstVtxSep );
226 free( data_.fstVtxSep );
232 if( data_.A.Store != NULL ){
233 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
237 if ( this->status_.getNumNumericFact() > 0 ){
238 function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
240 function_map::LUstructFree(&(data_.lu));
246 if ( this->status_.symbolicFactorizationDone() &&
247 !this->status_.numericFactorizationDone() ){
248 if ( data_.pslu_freeable.xlsub != NULL ){
249 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
250 SUPERLU_FREE( data_.pslu_freeable.xlsub );
251 SUPERLU_FREE( data_.pslu_freeable.lsub );
253 free( data_.pslu_freeable.xlsub );
254 free( data_.pslu_freeable.lsub );
257 if ( data_.pslu_freeable.xusub != NULL ){
258 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
259 SUPERLU_FREE( data_.pslu_freeable.xusub );
260 SUPERLU_FREE( data_.pslu_freeable.usub );
262 free( data_.pslu_freeable.xusub );
263 free( data_.pslu_freeable.usub );
266 if ( data_.pslu_freeable.supno_loc != NULL ){
267 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
268 SUPERLU_FREE( data_.pslu_freeable.supno_loc );
269 SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
270 SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
272 free( data_.pslu_freeable.supno_loc );
273 free( data_.pslu_freeable.xsup_beg_loc );
274 free( data_.pslu_freeable.xsup_end_loc );
277 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
278 SUPERLU_FREE( data_.pslu_freeable.globToLoc );
280 free( data_.pslu_freeable.globToLoc );
284 SLUD::PStatFree( &(data_.stat) ) ;
289 if ( data_.options.SolveInitialized == SLUD::YES )
290 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
294 SLUD::superlu_gridexit(&(data_.grid));
298 if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
301 template<
class Matrix,
class Vector>
305 int job = data_.largediag_mc64_job;
308 data_.R1.resize(data_.A.nrow);
309 data_.C1.resize(data_.A.ncol);
312 SLUD::NCformat *GAstore = (SLUD::NCformat*) GA.Store;
313 SLUD::int_t* colptr = GAstore->colptr;
314 SLUD::int_t* rowind = GAstore->rowind;
315 SLUD::int_t nnz = GAstore->nnz;
316 slu_type *a_GA = (slu_type *) GAstore->nzval;
317 MPI_Datatype mpi_dtype = Teuchos::Details::MpiTypeTraits<magnitude_type>::getType();
318 MPI_Datatype mpi_itype = Teuchos::Details::MpiTypeTraits<SLUD::int_t>::getType();
321 if ( !data_.grid.iam ) {
322 iinfo = function_map::ldperm_dist(job, data_.A.nrow, nnz, colptr, rowind, a_GA,
323 data_.perm_r.getRawPtr(), data_.R1.getRawPtr(), data_.C1.getRawPtr());
325 MPI_Bcast( &iinfo, 1, MPI_INT, 0, data_.grid.comm );
327 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
328 if ( job == 5 && data_.options.Equil ) {
329 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
330 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
334 MPI_Bcast( &iinfo, 1, mpi_int_t, 0, data_.grid.comm );
336 MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
337 if ( job == 5 && data_.options.Equil ) {
338 MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
339 MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
343 TEUCHOS_TEST_FOR_EXCEPTION( iinfo != 0,
345 "SuperLU_DIST pre-ordering failed to compute row perm with "
346 << iinfo << std::endl);
350 for (SLUD::int_t i = 0; i < data_.A.nrow; ++i) data_.R1[i] = exp(data_.R1[i]);
351 for (SLUD::int_t i = 0; i < data_.A.ncol; ++i) data_.C1[i] = exp(data_.C1[i]);
356 template<
class Matrix,
class Vector>
360 if (data_.options.RowPerm == SLUD::NOROWPERM) {
361 SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
362 for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
364 else if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
365 if (!force_symbfact_)
367 return (EXIT_SUCCESS + 1);
377 if( this->status_.getNumPreOrder() > 0 ){
378 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
379 SUPERLU_FREE( data_.sizes );
380 SUPERLU_FREE( data_.fstVtxSep );
383 free( data_.fstVtxSep );
388 #ifdef HAVE_AMESOS2_TIMERS
389 Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
391 info = SLUD::get_perm_c_parmetis( &(data_.A),
392 data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
393 data_.grid.nprow * data_.grid.npcol, data_.domains,
394 &(data_.sizes), &(data_.fstVtxSep),
395 &(data_.grid), &(data_.symb_comm) );
397 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
399 "SuperLU_DIST pre-ordering ran out of memory after allocating "
400 << info <<
" bytes of memory" );
412 template <
class Matrix,
class Vector>
417 if (!force_symbfact_) {
418 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
420 return (EXIT_SUCCESS + 1);
428 #ifdef HAVE_AMESOS2_TIMERS
429 Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
432 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
433 info = SLUD::symbfact_dist(&(data_.options), (data_.grid.nprow) * (data_.grid.npcol),
434 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
435 data_.perm_r.getRawPtr(), data_.sizes,
436 data_.fstVtxSep, &(data_.pslu_freeable),
437 &(data_.grid.comm), &(data_.symb_comm),
441 info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
442 data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
443 data_.perm_r.getRawPtr(), data_.sizes,
444 data_.fstVtxSep, &(data_.pslu_freeable),
445 &(data_.grid.comm), &(data_.symb_comm),
449 TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
451 "SuperLU_DIST symbolic factorization ran out of memory after"
452 " allocating " << info <<
" bytes of memory" );
454 same_symbolic_ =
false;
455 same_solve_struct_ =
false;
461 template <
class Matrix,
class Vector>
467 SLUD::SuperMatrix GA;
468 bool need_value =
false;
471 if( data_.options.Equil == SLUD::YES ) {
472 SLUD::int_t info = 0;
475 data_.R.resize(this->globalNumRows_);
476 data_.C.resize(this->globalNumCols_);
477 function_map::gsequ_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
478 &(data_.rowcnd), &(data_.colcnd), &(data_.amax), &info, &(data_.grid));
481 function_map::laqgs_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
482 data_.rowcnd, data_.colcnd, data_.amax,
485 data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
486 data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
489 if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
492 SLUD::D::pdCompRow_loc_to_CompCol_global(
true, &data_.A, &data_.grid, &GA);
495 computeRowPermutationLargeDiagMC64(GA);
498 force_symbfact_ =
true;
500 symbolicFactorization_impl();
501 force_symbfact_ =
false;
505 if (data_.largediag_mc64_job == 5)
507 SLUD::NRformat_loc *Astore = (SLUD::NRformat_loc*) data_.A.Store;
508 slu_type *a = (slu_type*) Astore->nzval;
509 SLUD::int_t m_loc = Astore->m_loc;
510 SLUD::int_t fst_row = Astore->fst_row;
511 SLUD::int_t i, j, irow = fst_row, icol;
515 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
516 for (j = 0; j < m_loc; ++j) {
517 for (i = rowptr_view_.data()[j]; i < rowptr_view_.data()[j+1]; ++i) {
518 icol = colind_view_.data()[i];
519 a[i] = mult_op(a[i], data_.R1[irow] * data_.C1[icol]);
525 if ( data_.rowequ )
for (i = 0; i < data_.A.nrow; ++i) data_.R[i] *= data_.R1[i];
526 else for (i = 0; i < data_.A.nrow; ++i) data_.R[i] = data_.R1[i];
527 if ( data_.colequ )
for (i = 0; i < data_.A.ncol; ++i) data_.C[i] *= data_.C1[i];
528 else for (i = 0; i < data_.A.ncol; ++i) data_.C[i] = data_.C1[i];
530 data_.rowequ = data_.colequ = 1;
536 size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
537 for(
size_t i = 0; i < nnz_loc; ++i ) colind_view_(i) = data_.perm_c[colind_view_(i)];
540 if( same_symbolic_ ){
545 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
546 data_.options.Fact = SLUD::SamePattern_SameRowPerm;
547 function_map::pdistribute(&(data_.options),
548 as<SLUD::int_t>(this->globalNumRows_),
549 &(data_.A), &(data_.scale_perm),
550 &(data_.glu_freeable), &(data_.lu),
553 function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
554 as<SLUD::int_t>(this->globalNumRows_),
555 &(data_.A), &(data_.scale_perm),
556 &(data_.glu_freeable), &(data_.lu),
560 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
561 data_.options.Fact = SLUD::DOFACT;
562 function_map::dist_psymbtonum(&(data_.options),
563 as<SLUD::int_t>(this->globalNumRows_),
564 &(data_.A), &(data_.scale_perm),
565 &(data_.pslu_freeable), &(data_.lu),
568 function_map::dist_psymbtonum(SLUD::DOFACT,
569 as<SLUD::int_t>(this->globalNumRows_),
570 &(data_.A), &(data_.scale_perm),
571 &(data_.pslu_freeable), &(data_.lu),
577 bool notran = (data_.options.Trans == SLUD::NOTRANS);
578 magnitude_type anorm = function_map::plangs((notran ? (
char *)
"1" : (
char *)
"I"), &(data_.A), &(data_.grid));
582 #ifdef HAVE_AMESOS2_TIMERS
583 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
585 function_map::gstrf(&(data_.options), this->globalNumRows_,
586 this->globalNumCols_, anorm, &(data_.lu),
587 &(data_.grid), &(data_.stat), &info);
591 TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
593 "L and U factors have been computed but U("
594 << info <<
"," << info <<
") is exactly zero "
595 "(i.e. U is singular)");
599 SLUD::Destroy_CompCol_Matrix_dist(&GA);
605 data_.options.Fact = SLUD::FACTORED;
606 same_symbolic_ =
true;
612 template <
class Matrix,
class Vector>
621 const size_t local_len_rhs = superlu_rowmap_->getLocalNumElements();
622 const global_size_type nrhs = X->getGlobalNumVectors();
623 const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
626 bvals_.resize(nrhs * local_len_rhs);
627 xvals_.resize(nrhs * local_len_rhs);
633 #ifdef HAVE_AMESOS2_TIMERS
634 Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
643 #ifdef HAVE_AMESOS2_TIMERS
644 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
649 copy_helper::do_get(B,
652 Teuchos::ptrInArg(*superlu_rowmap_));
673 if( !same_solve_struct_ ){
674 if( data_.options.SolveInitialized == SLUD::YES ){
675 function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
677 function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
678 data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
679 &(data_.grid), &(data_.solve_struct));
683 same_solve_struct_ =
true;
687 if (data_.options.Equil == SLUD::YES && data_.rowequ) {
688 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
689 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
690 for(global_size_type j = 0; j < nrhs; ++j) {
691 for(
size_t i = 0; i < local_len_rhs; ++i) {
692 bvals_[i + j*ld] = mult_op(bvals_[i + j*ld], data_.R[first_global_row_b + i]);
700 #ifdef HAVE_AMESOS2_TIMERS
701 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
704 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
705 function_map::gstrs(&(data_.options), as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
706 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
707 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
708 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
709 &(data_.solve_struct), &(data_.stat), &ierr);
711 function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
712 &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
713 as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
714 as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
715 &(data_.solve_struct), &(data_.stat), &ierr);
719 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
721 "Argument " << -ierr <<
" to gstrs had an illegal value" );
736 #ifdef HAVE_AMESOS2_TIMERS
737 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
739 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
740 function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
741 as<SLUD::int_t>(local_len_rhs),
742 data_.solve_struct.row_to_proc,
743 data_.solve_struct.inv_perm_c,
744 bvals_.getRawPtr(), ld,
745 xvals_.getRawPtr(), ld,
751 if (data_.options.Equil == SLUD::YES && data_.colequ) {
752 SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
753 SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
754 for(global_size_type j = 0; j < nrhs; ++j) {
755 for(
size_t i = 0; i < local_len_rhs; ++i) {
756 xvals_[i + j*ld] = mult_op(xvals_[i + j*ld], data_.C[first_global_row_b + i]);
764 #ifdef HAVE_AMESOS2_TIMERS
765 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
768 put_helper::do_put(X,
771 Teuchos::ptrInArg(*superlu_rowmap_));
778 template <
class Matrix,
class Vector>
783 return( this->globalNumRows_ == this->globalNumCols_ );
787 template <
class Matrix,
class Vector>
793 using Teuchos::getIntegralValue;
794 using Teuchos::ParameterEntryValidator;
796 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
798 if( parameterList->isParameter(
"npcol") || parameterList->isParameter(
"nprow") ){
799 TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter(
"nprow") &&
800 parameterList->isParameter(
"npcol")),
801 std::invalid_argument,
802 "nprow and npcol must be set together" );
804 SLUD::int_t nprow = parameterList->template get<SLUD::int_t>(
"nprow");
805 SLUD::int_t npcol = parameterList->template get<SLUD::int_t>(
"npcol");
807 TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
808 std::invalid_argument,
809 "nprow and npcol combination invalid" );
811 if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
813 SLUD::superlu_gridexit(&(data_.grid));
815 SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
819 TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
820 std::invalid_argument,
821 "SuperLU_DIST does not support solving the tranpose system" );
823 data_.options.Trans = SLUD::NOTRANS;
826 bool equil = parameterList->get<
bool>(
"Equil",
false);
827 data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
829 if( parameterList->isParameter(
"RowPerm") ){
830 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry(
"RowPerm").validator();
831 parameterList->getEntry(
"RowPerm").setValidator(rowperm_validator);
833 data_.options.RowPerm = getIntegralValue<SLUD::rowperm_t>(*parameterList,
"RowPerm");
836 if( parameterList->isParameter(
"LargeDiag_MC64-Options") ){
837 data_.largediag_mc64_job = parameterList->template get<int>(
"LargeDiag_MC64-Options");
840 if( parameterList->isParameter(
"ColPerm") ){
841 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
842 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
844 data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList,
"ColPerm");
853 data_.options.IterRefine = SLUD::NOREFINE;
855 bool replace_tiny = parameterList->get<
bool>(
"ReplaceTinyPivot",
true);
856 data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
858 if( parameterList->isParameter(
"IsContiguous") ){
859 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous");
864 template <
class Matrix,
class Vector>
865 Teuchos::RCP<const Teuchos::ParameterList>
869 using Teuchos::tuple;
870 using Teuchos::ParameterList;
871 using Teuchos::EnhancedNumberValidator;
872 using Teuchos::setStringToIntegralParameter;
873 using Teuchos::setIntParameter;
874 using Teuchos::stringToIntegralParameterEntryValidator;
876 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
878 if( is_null(valid_params) ){
879 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
881 Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
882 = Teuchos::rcp(
new EnhancedNumberValidator<SLUD::int_t>() );
883 col_row_validator->setMin(1);
885 pl->set(
"npcol", data_.grid.npcol,
886 "Number of columns in the processor grid. "
887 "Must be set with nprow", col_row_validator);
888 pl->set(
"nprow", data_.grid.nprow,
889 "Number of rows in the SuperLU_DIST processor grid. "
890 "Must be set together with npcol", col_row_validator);
893 setStringToIntegralParameter<SLUD::trans_t>(
"Trans",
"NOTRANS",
894 "Solve for the transpose system or not",
895 tuple<string>(
"NOTRANS"),
896 tuple<string>(
"Do not solve with transpose"),
897 tuple<SLUD::trans_t>(SLUD::NOTRANS),
901 pl->set(
"Equil",
false,
"Whether to equilibrate the system before solve");
914 pl->set(
"ReplaceTinyPivot",
true,
915 "Specifies whether to replace tiny diagonals during LU factorization");
918 setStringToIntegralParameter<SLUD::rowperm_t>(
"RowPerm",
"NOROWPERM",
919 "Specifies how to permute the rows of the "
920 "matrix for sparsity preservation",
921 tuple<string>(
"NOROWPERM",
"LargeDiag_MC64"),
922 tuple<string>(
"Natural ordering",
923 "Duff/Koster algorithm"),
924 tuple<SLUD::rowperm_t>(SLUD::NOROWPERM,
925 SLUD::LargeDiag_MC64),
928 setIntParameter(
"LargeDiag_MC64-Options", 4,
"Options for RowPerm-LargeDiag_MC64", pl.getRawPtr());
931 setStringToIntegralParameter<SLUD::colperm_t>(
"ColPerm",
"PARMETIS",
932 "Specifies how to permute the columns of the "
933 "matrix for sparsity preservation",
934 tuple<string>(
"NATURAL",
"PARMETIS"),
935 tuple<string>(
"Natural ordering",
936 "ParMETIS ordering on A^T + A"),
937 tuple<SLUD::colperm_t>(SLUD::NATURAL,
941 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
950 template <
class Matrix,
class Vector>
954 SLUD::int_t& npcol)
const {
955 TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
956 std::invalid_argument,
957 "Number of MPI processes must be at least 1" );
958 SLUD::int_t c, r = 1;
959 while( r*r <= nprocs ) r++;
962 while( (r--)*c != nprocs ){
968 if( r > 1 || nprocs < 9){
975 template <
class Matrix,
class Vector>
979 using Teuchos::Array;
980 using Teuchos::ArrayView;
981 using Teuchos::ptrInArg;
986 #ifdef HAVE_AMESOS2_TIMERS
987 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
991 if( data_.A.Store != NULL ){
992 SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
993 data_.A.Store = NULL;
996 Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
997 = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
999 int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
1000 l_nnz = as<int_t>(redist_mat->getLocalNNZ());
1001 l_rows = as<int_t>(redist_mat->getLocalNumRows());
1002 g_rows = as<int_t>(redist_mat->getGlobalNumRows());
1004 fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
1006 Kokkos::resize(nzvals_view_, l_nnz);
1007 Kokkos::resize(colind_view_, l_nnz);
1008 Kokkos::resize(rowptr_view_, l_rows + 1);
1011 #ifdef HAVE_AMESOS2_TIMERS
1012 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
1016 host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
1018 nzvals_view_, colind_view_, rowptr_view_,
1020 ptrInArg(*superlu_rowmap_),
1025 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
1027 "Did not get the expected number of non-zero vals");
1030 SLUD::Dtype_t dtype = type_map::dtype;
1033 function_map::create_CompRowLoc_Matrix(&(data_.A),
1035 l_nnz, l_rows, fst_global_row,
1036 nzvals_view_.data(),
1037 colind_view_.data(),
1038 rowptr_view_.data(),
1040 dtype, SLUD::SLU_GE);
1047 template<
class Matrix,
class Vector>
1053 #endif // AMESOS2_SUPERLUDIST_DEF_HPP
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Return a const parameter list of all of the valid parameters that this->setParameterList(...) will accept.
Definition: Amesos2_SolverCore_def.hpp:524
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
void computeRowPermutationLargeDiagMC64(SLUD::SuperMatrix &GA)
Compute the row permutation for option LargeDiag-MC64.
Definition: Amesos2_Superludist_def.hpp:303
Amesos2 interface to the distributed memory version of SuperLU.
Definition: Amesos2_Superludist_decl.hpp:56
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:445
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList) override
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:492
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superludist_def.hpp:35
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superludist_def.hpp:789
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:442
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:233
Utility functions for Amesos2.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition: Amesos2_SolverCore_decl.hpp:329
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superludist_def.hpp:866
Provides definition of SuperLU_DIST types as well as conversions and type traits. ...
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superludist_def.hpp:358
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:625
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superludist_def.hpp:780
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition: Amesos2_Superludist_decl.hpp:309
~Superludist()
Destructor.
Definition: Amesos2_Superludist_def.hpp:208
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition: Amesos2_Superludist_def.hpp:414
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition: Amesos2_Superludist_def.hpp:952
bool in_grid_
true if this processor is in SuperLU_DISTS's 2D process grid
Definition: Amesos2_Superludist_decl.hpp:301
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:339
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition: Amesos2_Superludist_def.hpp:614
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition: Amesos2_Superludist_def.hpp:977
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition: Amesos2_SolverCore_decl.hpp:421
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition: Amesos2_Superludist_def.hpp:463