Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
52 #ifndef AMESOS2_SUPERLUDIST_DEF_HPP
53 #define AMESOS2_SUPERLUDIST_DEF_HPP
54 
55 #include <Teuchos_Tuple.hpp>
56 #include <Teuchos_StandardParameterEntryValidators.hpp>
57 #include <Teuchos_DefaultMpiComm.hpp>
58 #include <Teuchos_Details_MpiTypeTraits.hpp>
59 
62 #include "Amesos2_Util.hpp"
63 
64 
65 namespace Amesos2 {
66 
67 
68  template <class Matrix, class Vector>
69  Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
70  Teuchos::RCP<Vector> X,
71  Teuchos::RCP<const Vector> B)
72  : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
73  , bvals_()
74  , xvals_()
75  , in_grid_(false)
76  , force_symbfact_(false)
77  , is_contiguous_(true)
78  {
79  using Teuchos::Comm;
80  // It's OK to depend on MpiComm explicitly here, because
81  // SuperLU_DIST requires MPI anyway.
82  using Teuchos::MpiComm;
83  using Teuchos::outArg;
84  using Teuchos::ParameterList;
85  using Teuchos::parameterList;
86  using Teuchos::RCP;
87  using Teuchos::rcp;
88  using Teuchos::rcp_dynamic_cast;
89  using Teuchos::REDUCE_SUM;
90  using Teuchos::reduceAll;
91  typedef global_ordinal_type GO;
92  typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
93 
95  // Set up the SuperLU_DIST processor grid //
97 
98  RCP<const Comm<int> > comm = this->getComm ();
99  const int myRank = comm->getRank ();
100  const int numProcs = comm->getSize ();
101 
102  SLUD::int_t nprow, npcol;
103  get_default_grid_size (numProcs, nprow, npcol);
104 
105  {
106  // FIXME (mfh 16 Dec 2014) getComm() just returns
107  // matrixA_->getComm(), so it's not clear why we need to ask for
108  // the matrix's communicator separately here.
109  RCP<const Comm<int> > matComm = this->matrixA_->getComm ();
110  TEUCHOS_TEST_FOR_EXCEPTION(
111  matComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
112  "constructor: The matrix's communicator is null!");
113  RCP<const MpiComm<int> > matMpiComm =
114  rcp_dynamic_cast<const MpiComm<int> > (matComm);
115  // FIXME (mfh 16 Dec 2014) If the matrix's communicator is a
116  // SerialComm, we probably could just use MPI_COMM_SELF here.
117  // I'm not sure if SuperLU_DIST is smart enough to handle that
118  // case, though.
119  TEUCHOS_TEST_FOR_EXCEPTION(
120  matMpiComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
121  "constructor: The matrix's communicator is not an MpiComm!");
122  TEUCHOS_TEST_FOR_EXCEPTION(
123  matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::"
124  "Superlustdist constructor: The matrix's communicator claims to be a "
125  "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
126  "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
127  "exist, which likely implies that the Teuchos::MpiComm was constructed "
128  "incorrectly. It means something different than if the MPI_Comm were "
129  "MPI_COMM_NULL.");
130  MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
131  data_.mat_comm = rawMpiComm;
132  // This looks a bit like ScaLAPACK's grid initialization (which
133  // technically takes place in the BLACS, not in ScaLAPACK
134  // proper). See http://netlib.org/scalapack/slug/node34.html.
135  // The main difference is that SuperLU_DIST depends explicitly
136  // on MPI, while the BLACS hides its communication protocol.
137  SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
138  }
139 
141  // Set some default parameters. //
142  // //
143  // Must do this after grid has been created in //
144  // case user specifies the nprow and npcol parameters //
146  SLUD::set_default_options_dist(&data_.options);
147 
148  RCP<ParameterList> default_params =
149  parameterList (* (this->getValidParameters ()));
150  this->setParameters (default_params);
151 
152  // Set some internal options
153  data_.options.Fact = SLUD::DOFACT;
154  data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
155  data_.options.SolveInitialized = SLUD::NO;
156  data_.options.RefineInitialized = SLUD::NO;
157  data_.rowequ = false;
158  data_.colequ = false;
159  data_.perm_r.resize(this->globalNumRows_);
160  data_.perm_c.resize(this->globalNumCols_);
161  data_.largediag_mc64_job = 4;
162  for (global_size_type i = 0; i < this->globalNumRows_; i++)
163  data_.perm_r[i] = i;
164  for (global_size_type i = 0; i < this->globalNumCols_; i++)
165  data_.perm_c[i] = i;
166 
168  // Set up a communicator for the parallel column ordering and //
169  // parallel symbolic factorization. //
171  data_.symb_comm = MPI_COMM_NULL;
172 
173  // domains is the next power of 2 less than nprow*npcol. This
174  // value will be used for creating an MPI communicator for the
175  // pre-ordering and symbolic factorization methods.
176  data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
177 
178  const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
179  MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
180 
182  // Set up a row Map that only includes processes that are in the
183  // SuperLU process grid. This will be used for redistributing A.
185 
186  // mfh 16 Dec 2014: We could use createWeightedContigMapWithNode
187  // with myProcParticipates as the weight, but that costs an extra
188  // all-reduce.
189 
190  // Set to 1 if I am in the grid, and I get some of the matrix rows.
191  int myProcParticipates = 0;
192  if (myRank < nprow * npcol) {
193  in_grid_ = true;
194  myProcParticipates = 1;
195  }
196 
197  // Compute how many processes in the communicator belong to the
198  // process grid.
199  int numParticipatingProcs = 0;
200  reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
201  outArg (numParticipatingProcs));
202  TEUCHOS_TEST_FOR_EXCEPTION(
203  this->globalNumRows_ != 0 && numParticipatingProcs == 0,
204  std::logic_error, "Amesos2::Superludist constructor: The matrix has "
205  << this->globalNumRows_ << " > 0 global row(s), but no processes in the "
206  "communicator want to participate in its factorization! nprow = "
207  << nprow << " and npcol = " << npcol << ".");
208 
209  // Divide up the rows among the participating processes.
210  size_t myNumRows = 0;
211  {
212  const GO GNR = static_cast<GO> (this->globalNumRows_);
213  const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
214  GNR / static_cast<GO> (numParticipatingProcs);
215  const GO remainder =
216  GNR - quotient * static_cast<GO> (numParticipatingProcs);
217  const GO lclNumRows = (static_cast<GO> (myRank) < remainder) ?
218  (quotient + static_cast<GO> (1)) : quotient;
219  myNumRows = static_cast<size_t> (lclNumRows);
220  }
221 
222  // TODO: might only need to initialize if parallel symbolic factorization is requested.
223  const GO indexBase = this->matrixA_->getRowMap ()->getIndexBase ();
225  rcp (new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
226 
228  // Do some other initialization //
230 
231  data_.A.Store = NULL;
232  function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
233  SLUD::PStatInit(&(data_.stat));
234  // We do not use ScalePermstructInit because we will use our own
235  // arrays for storing perm_r and perm_c
236  data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
237  data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
238  }
239 
240 
241  template <class Matrix, class Vector>
243  {
244  /* Free SuperLU_DIST data_types
245  * - Matrices
246  * - Vectors
247  * - Stat object
248  * - ScalePerm, LUstruct, grid, and solve objects
249  *
250  * Note: the function definitions are the same regardless whether
251  * complex or real, so we arbitrarily use the D namespace
252  */
253  if ( this->status_.getNumPreOrder() > 0 ){
255 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
256  SUPERLU_FREE( data_.sizes );
257  SUPERLU_FREE( data_.fstVtxSep );
258 #else
259  free( data_.sizes );
260  free( data_.fstVtxSep );
261 #endif
262  }
263 
264  // Cleanup old matrix store memory if it's non-NULL. Our
265  // Teuchos::Array's will destroy rowind, colptr, and nzval for us
266  if( data_.A.Store != NULL ){
267  SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
268  }
269 
270  // LU data is initialized in numericFactorization_impl()
271  if ( this->status_.getNumNumericFact() > 0 ){
272  function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
273  }
274  function_map::LUstructFree(&(data_.lu));
275 
276  // If a symbolic factorization is ever performed without a
277  // follow-up numericfactorization, there are some arrays in the
278  // Pslu_freeable struct which will never be free'd by
279  // SuperLU_DIST.
280  if ( this->status_.symbolicFactorizationDone() &&
281  !this->status_.numericFactorizationDone() ){
282  if ( data_.pslu_freeable.xlsub != NULL ){
283 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
284  SUPERLU_FREE( data_.pslu_freeable.xlsub );
285  SUPERLU_FREE( data_.pslu_freeable.lsub );
286 #else
287  free( data_.pslu_freeable.xlsub );
288  free( data_.pslu_freeable.lsub );
289 #endif
290  }
291  if ( data_.pslu_freeable.xusub != NULL ){
292 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
293  SUPERLU_FREE( data_.pslu_freeable.xusub );
294  SUPERLU_FREE( data_.pslu_freeable.usub );
295 #else
296  free( data_.pslu_freeable.xusub );
297  free( data_.pslu_freeable.usub );
298 #endif
299  }
300  if ( data_.pslu_freeable.supno_loc != NULL ){
301 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
302  SUPERLU_FREE( data_.pslu_freeable.supno_loc );
303  SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
304  SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
305 #else
306  free( data_.pslu_freeable.supno_loc );
307  free( data_.pslu_freeable.xsup_beg_loc );
308  free( data_.pslu_freeable.xsup_end_loc );
309 #endif
310  }
311 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
312  SUPERLU_FREE( data_.pslu_freeable.globToLoc );
313 #else
314  free( data_.pslu_freeable.globToLoc );
315 #endif
316  }
317 
318  SLUD::PStatFree( &(data_.stat) ) ;
319 
320  // Teuchos::Arrays will free R, C, perm_r, and perm_c
321  // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
322 
323  if ( data_.options.SolveInitialized == SLUD::YES )
324  function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
325 
326  // gridexit of an older version frees SuperLU_MPI_DOUBLE_COMPLE,
327  // which could cause an issue if there are still active instances of superludist?
328  SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
329  // cases where grid
330  // wouldn't be initialized?
331 
332  if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
333  }
334 
335  template<class Matrix, class Vector>
336  void
338  {
339  int job = data_.largediag_mc64_job;
340  if (job == 5)
341  {
342  data_.R1.resize(data_.A.nrow);
343  data_.C1.resize(data_.A.ncol);
344  }
345 
346  SLUD::NCformat *GAstore = (SLUD::NCformat*) GA.Store;
347  SLUD::int_t* colptr = GAstore->colptr;
348  SLUD::int_t* rowind = GAstore->rowind;
349  SLUD::int_t nnz = GAstore->nnz;
350  slu_type *a_GA = (slu_type *) GAstore->nzval;
351  MPI_Datatype mpi_dtype = Teuchos::Details::MpiTypeTraits<magnitude_type>::getType();
352  MPI_Datatype mpi_itype = Teuchos::Details::MpiTypeTraits<SLUD::int_t>::getType();
353 
354  int iinfo = 0;
355  if ( !data_.grid.iam ) { /* Process 0 finds a row permutation */
356  iinfo = function_map::ldperm_dist(job, data_.A.nrow, nnz, colptr, rowind, a_GA,
357  data_.perm_r.getRawPtr(), data_.R1.getRawPtr(), data_.C1.getRawPtr());
358 
359  MPI_Bcast( &iinfo, 1, MPI_INT, 0, data_.grid.comm );
360  if ( iinfo == 0 ) {
361  MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
362  if ( job == 5 && data_.options.Equil ) {
363  MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
364  MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
365  }
366  }
367  } else {
368  MPI_Bcast( &iinfo, 1, mpi_int_t, 0, data_.grid.comm );
369  if ( iinfo == 0 ) {
370  MPI_Bcast( data_.perm_r.getRawPtr(), data_.A.nrow, mpi_itype, 0, data_.grid.comm );
371  if ( job == 5 && data_.options.Equil ) {
372  MPI_Bcast( data_.R1.getRawPtr(), data_.A.nrow, mpi_dtype, 0, data_.grid.comm );
373  MPI_Bcast( data_.C1.getRawPtr(), data_.A.ncol, mpi_dtype, 0, data_.grid.comm );
374  }
375  }
376  }
377  TEUCHOS_TEST_FOR_EXCEPTION( iinfo != 0,
378  std::runtime_error,
379  "SuperLU_DIST pre-ordering failed to compute row perm with "
380  << iinfo << std::endl);
381 
382  if (job == 5)
383  {
384  for (SLUD::int_t i = 0; i < data_.A.nrow; ++i) data_.R1[i] = exp(data_.R1[i]);
385  for (SLUD::int_t i = 0; i < data_.A.ncol; ++i) data_.C1[i] = exp(data_.C1[i]);
386  }
387  }
388 
389 
390  template<class Matrix, class Vector>
391  int
393  {
394  if (data_.options.RowPerm == SLUD::NOROWPERM) {
395  SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
396  for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
397  }
398  else if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
399  if (!force_symbfact_)
400  // defer to numerical factorization because row permutation requires the matrix values
401  return (EXIT_SUCCESS + 1);
402  }
403  // loadA_impl(); // Refresh matrix values
404 
405  if( in_grid_ ){
406  // If this function has been called at least once, then the
407  // sizes, and fstVtxSep arrays were allocated in
408  // get_perm_c_parmetis. Delete them before calling that
409  // function again. These arrays will also be dealloc'd in the
410  // deconstructor.
411  if( this->status_.getNumPreOrder() > 0 ){
412 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
413  SUPERLU_FREE( data_.sizes );
414  SUPERLU_FREE( data_.fstVtxSep );
415 #else
416  free( data_.sizes );
417  free( data_.fstVtxSep );
418 #endif
419  }
420  float info = 0.0;
421  {
422 #ifdef HAVE_AMESOS2_TIMERS
423  Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
424 #endif
425  info = SLUD::get_perm_c_parmetis( &(data_.A),
426  data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
427  data_.grid.nprow * data_.grid.npcol, data_.domains,
428  &(data_.sizes), &(data_.fstVtxSep),
429  &(data_.grid), &(data_.symb_comm) );
430  }
431  TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
432  std::runtime_error,
433  "SuperLU_DIST pre-ordering ran out of memory after allocating "
434  << info << " bytes of memory" );
435  }
436 
437  // Ordering will be applied directly before numeric factorization,
438  // after we have a chance to get updated coefficients from the
439  // matrix
440 
441  return EXIT_SUCCESS;
442  }
443 
444 
445 
446  template <class Matrix, class Vector>
447  int
449  {
450  // loadA_impl(); // Refresh matrix values
451  if (!force_symbfact_) {
452  if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
453  // defer to numerical factorization because row permutation requires the matrix values
454  return (EXIT_SUCCESS + 1);
455  }
456  }
457 
458  if( in_grid_ ){
459 
460  float info = 0.0;
461  {
462 #ifdef HAVE_AMESOS2_TIMERS
463  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
464 #endif
465 
466 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
467  info = SLUD::symbfact_dist(&(data_.options), (data_.grid.nprow) * (data_.grid.npcol),
468  data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
469  data_.perm_r.getRawPtr(), data_.sizes,
470  data_.fstVtxSep, &(data_.pslu_freeable),
471  &(data_.grid.comm), &(data_.symb_comm),
472  &(data_.mem_usage));
473 
474 #else
475  info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
476  data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
477  data_.perm_r.getRawPtr(), data_.sizes,
478  data_.fstVtxSep, &(data_.pslu_freeable),
479  &(data_.grid.comm), &(data_.symb_comm),
480  &(data_.mem_usage));
481 #endif
482  }
483  TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
484  std::runtime_error,
485  "SuperLU_DIST symbolic factorization ran out of memory after"
486  " allocating " << info << " bytes of memory" );
487  }
488  same_symbolic_ = false;
489  same_solve_struct_ = false;
490 
491  return EXIT_SUCCESS;
492  }
493 
494 
495  template <class Matrix, class Vector>
496  int
498  using Teuchos::as;
499 
500  // loadA_impl(); // Refresh the matrix values
501  SLUD::SuperMatrix GA; /* Global A in NC format */
502  bool need_value = false;
503 
504  if( in_grid_ ) {
505  if( data_.options.Equil == SLUD::YES ) {
506  SLUD::int_t info = 0;
507 
508  // Compute scaling
509  data_.R.resize(this->globalNumRows_);
510  data_.C.resize(this->globalNumCols_);
511  function_map::gsequ_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
512  &(data_.rowcnd), &(data_.colcnd), &(data_.amax), &info, &(data_.grid));
513 
514  // Apply the scalings
515  function_map::laqgs_loc(&(data_.A), data_.R.getRawPtr(), data_.C.getRawPtr(),
516  data_.rowcnd, data_.colcnd, data_.amax,
517  &(data_.equed));
518 
519  data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
520  data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
521 
522  // Compute and apply the row permutation
523  if (data_.options.RowPerm == SLUD::LargeDiag_MC64) {
524  // Create a column-order copy of A
525  need_value = true;
526  SLUD::D::pdCompRow_loc_to_CompCol_global(true, &data_.A, &data_.grid, &GA);
527 
528  // Compute row permutation
529  computeRowPermutationLargeDiagMC64(GA);
530 
531  // Here we do symbolic factorization
532  force_symbfact_ = true;
533  preOrdering_impl();
534  symbolicFactorization_impl();
535  force_symbfact_ = false;
536 
537  // Apply row-permutation scaling for job=5
538  // Here we do it manually to bypass the threshold check in laqgs_loc
539  if (data_.largediag_mc64_job == 5)
540  {
541  SLUD::NRformat_loc *Astore = (SLUD::NRformat_loc*) data_.A.Store;
542  slu_type *a = (slu_type*) Astore->nzval;
543  SLUD::int_t m_loc = Astore->m_loc;
544  SLUD::int_t fst_row = Astore->fst_row;
545  SLUD::int_t i, j, irow = fst_row, icol;
546 
547  /* Scale the distributed matrix further.
548  A <-- diag(R1)*A*diag(C1) */
549  SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
550  for (j = 0; j < m_loc; ++j) {
551  for (i = rowptr_view_.data()[j]; i < rowptr_view_.data()[j+1]; ++i) {
552  icol = colind_view_.data()[i];
553  a[i] = mult_op(a[i], data_.R1[irow] * data_.C1[icol]);
554  }
555  ++irow;
556  }
557 
558  /* Multiply together the scaling factors */
559  if ( data_.rowequ ) for (i = 0; i < data_.A.nrow; ++i) data_.R[i] *= data_.R1[i];
560  else for (i = 0; i < data_.A.nrow; ++i) data_.R[i] = data_.R1[i];
561  if ( data_.colequ ) for (i = 0; i < data_.A.ncol; ++i) data_.C[i] *= data_.C1[i];
562  else for (i = 0; i < data_.A.ncol; ++i) data_.C[i] = data_.C1[i];
563 
564  data_.rowequ = data_.colequ = 1;
565  }
566  }
567  }
568 
569  // Apply the column ordering, so that AC is the column-permuted A, and compute etree
570  size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
571  for( size_t i = 0; i < nnz_loc; ++i ) colind_view_(i) = data_.perm_c[colind_view_(i)];
572 
573  // Distribute data from the symbolic factorization
574  if( same_symbolic_ ){
575  // Note: with the SamePattern_SameRowPerm options, it does not
576  // matter that the glu_freeable member has never been
577  // initialized, because it is never accessed. It is a
578  // placeholder arg. The real work is done in data_.lu
579 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
580  data_.options.Fact = SLUD::SamePattern_SameRowPerm;
581  function_map::pdistribute(&(data_.options),
582  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
583  &(data_.A), &(data_.scale_perm),
584  &(data_.glu_freeable), &(data_.lu),
585  &(data_.grid));
586 #else
587  function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
588  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
589  &(data_.A), &(data_.scale_perm),
590  &(data_.glu_freeable), &(data_.lu),
591  &(data_.grid));
592 #endif
593  } else {
594 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
595  data_.options.Fact = SLUD::DOFACT;
596  function_map::dist_psymbtonum(&(data_.options),
597  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
598  &(data_.A), &(data_.scale_perm),
599  &(data_.pslu_freeable), &(data_.lu),
600  &(data_.grid));
601 #else
602  function_map::dist_psymbtonum(SLUD::DOFACT,
603  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
604  &(data_.A), &(data_.scale_perm),
605  &(data_.pslu_freeable), &(data_.lu),
606  &(data_.grid));
607 #endif
608  }
609 
610  // Retrieve the normI of A (required by gstrf).
611  bool notran = (data_.options.Trans == SLUD::NOTRANS);
612  magnitude_type anorm = function_map::plangs((notran ? (char *)"1" : (char *)"I"), &(data_.A), &(data_.grid));
613 
614  int info = 0;
615  {
616 #ifdef HAVE_AMESOS2_TIMERS
617  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
618 #endif
619  function_map::gstrf(&(data_.options), this->globalNumRows_,
620  this->globalNumCols_, anorm, &(data_.lu),
621  &(data_.grid), &(data_.stat), &info);
622  }
623 
624  // Check output
625  TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
626  std::runtime_error,
627  "L and U factors have been computed but U("
628  << info << "," << info << ") is exactly zero "
629  "(i.e. U is singular)");
630  }
631 
632  if (need_value)
633  SLUD::Destroy_CompCol_Matrix_dist(&GA);
634 
635  // The other option, that info_st < 0, denotes invalid parameters
636  // to the function, but we'll assume for now that that won't
637  // happen.
638 
639  data_.options.Fact = SLUD::FACTORED;
640  same_symbolic_ = true;
641 
642  return EXIT_SUCCESS;
643  }
644 
645 
646  template <class Matrix, class Vector>
647  int
649  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
650  {
651  using Teuchos::as;
652 
653  // local_len_rhs is how many of the multivector rows belong to
654  // this processor in the SuperLU_DIST processor grid.
655  const size_t local_len_rhs = superlu_rowmap_->getLocalNumElements();
656  const global_size_type nrhs = X->getGlobalNumVectors();
657  const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
658 
659  // make sure our multivector storage is sized appropriately
660  bvals_.resize(nrhs * local_len_rhs);
661  xvals_.resize(nrhs * local_len_rhs);
662 
663  // We assume the global length of the two vectors have already been
664  // checked for compatibility
665 
666  { // get the values from B
667 #ifdef HAVE_AMESOS2_TIMERS
668  Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
669 #endif
670  {
671  // The input dense matrix for B should be distributed in the
672  // same manner as the superlu_dist matrix. That is, if a
673  // processor has m_loc rows of A, then it should also have
674  // m_loc rows of B (and the same rows). We accomplish this by
675  // distributing the multivector rows with the same Map that
676  // the matrix A's rows are distributed.
677 #ifdef HAVE_AMESOS2_TIMERS
678  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
679 #endif
680  // get grid-distributed mv data. The multivector data will be
681  // distributed across the processes in the SuperLU_DIST grid.
682  typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
683  copy_helper::do_get(B,
684  bvals_(),
685  local_len_rhs,
686  Teuchos::ptrInArg(*superlu_rowmap_));
687  }
688  } // end block for conversion time
689 
690  if( in_grid_ ){
691  // if( data_.options.trans == SLUD::NOTRANS ){
692  // if( data_.rowequ ){ // row equilibration has been done on AC
693  // // scale bxvals_ by diag(R)
694  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
695  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
696  // }
697  // } else if( data_.colequ ){ // column equilibration has been done on AC
698  // // scale bxvals_ by diag(C)
699  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
700  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
701  // }
702 
703  // Initialize the SOLVEstruct_t.
704  //
705  // We are able to reuse the solve struct if we have not changed
706  // the sparsity pattern of L and U since the last solve
707  if( !same_solve_struct_ ){
708  if( data_.options.SolveInitialized == SLUD::YES ){
709  function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
710  }
711  function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
712  data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
713  &(data_.grid), &(data_.solve_struct));
714  // Flag that we can reuse this solve_struct unless another
715  // symbolicFactorization is called between here and the next
716  // solve.
717  same_solve_struct_ = true;
718  }
719 
720  // Apply row-scaling if requested
721  if (data_.options.Equil == SLUD::YES && data_.rowequ) {
722  SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
723  SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
724  for(global_size_type j = 0; j < nrhs; ++j) {
725  for(size_t i = 0; i < local_len_rhs; ++i) {
726  bvals_[i + j*ld] = mult_op(bvals_[i + j*ld], data_.R[first_global_row_b + i]);
727  }
728  }
729  }
730 
731  // Solve
732  int ierr = 0; // returned error code
733  {
734 #ifdef HAVE_AMESOS2_TIMERS
735  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
736 #endif
737 
738 #if (SUPERLU_DIST_MAJOR_VERSION > 7)
739  function_map::gstrs(&(data_.options), as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
740  &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
741  as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
742  as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
743  &(data_.solve_struct), &(data_.stat), &ierr);
744 #else
745  function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
746  &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
747  as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
748  as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
749  &(data_.solve_struct), &(data_.stat), &ierr);
750 #endif
751  } // end block for solve time
752 
753  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
754  std::runtime_error,
755  "Argument " << -ierr << " to gstrs had an illegal value" );
756 
757  // "Un-scale" the solution so that it is a solution of the original system
758  // if( data_.options.trans == SLUD::NOTRANS ){
759  // if( data_.colequ ){ // column equilibration has been done on AC
760  // // scale bxvals_ by diag(C)
761  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
762  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
763  // }
764  // } else if( data_.rowequ ){ // row equilibration has been done on AC
765  // // scale bxvals_ by diag(R)
766  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
767  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
768  // }
769  { // permute B to a solution of the original system
770 #ifdef HAVE_AMESOS2_TIMERS
771  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
772 #endif
773  SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
774  function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
775  as<SLUD::int_t>(local_len_rhs),
776  data_.solve_struct.row_to_proc,
777  data_.solve_struct.inv_perm_c,
778  bvals_.getRawPtr(), ld,
779  xvals_.getRawPtr(), ld,
780  as<int>(nrhs),
781  &(data_.grid));
782  }
783 
784  // Apply col-scaling if requested
785  if (data_.options.Equil == SLUD::YES && data_.colequ) {
786  SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
787  SLUD::slu_dist_mult<slu_type, magnitude_type> mult_op;
788  for(global_size_type j = 0; j < nrhs; ++j) {
789  for(size_t i = 0; i < local_len_rhs; ++i) {
790  xvals_[i + j*ld] = mult_op(xvals_[i + j*ld], data_.C[first_global_row_b + i]);
791  }
792  }
793  }
794  }
795 
796  /* Update X's global values */
797  {
798 #ifdef HAVE_AMESOS2_TIMERS
799  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
800 #endif
801  typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
802  put_helper::do_put(X,
803  xvals_(),
804  local_len_rhs,
805  Teuchos::ptrInArg(*superlu_rowmap_));
806  }
807 
808  return EXIT_SUCCESS;
809  }
810 
811 
812  template <class Matrix, class Vector>
813  bool
815  {
816  // SuperLU_DIST requires square matrices
817  return( this->globalNumRows_ == this->globalNumCols_ );
818  }
819 
820 
821  template <class Matrix, class Vector>
822  void
823  Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
824  {
825  using Teuchos::as;
826  using Teuchos::RCP;
827  using Teuchos::getIntegralValue;
828  using Teuchos::ParameterEntryValidator;
829 
830  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
831 
832  if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
833  TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
834  parameterList->isParameter("npcol")),
835  std::invalid_argument,
836  "nprow and npcol must be set together" );
837 
838  SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
839  SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
840 
841  TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
842  std::invalid_argument,
843  "nprow and npcol combination invalid" );
844 
845  if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
846  // De-allocate the default grid that was initialized in the constructor
847  SLUD::superlu_gridexit(&(data_.grid));
848  // Create a new grid
849  SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
850  } // else our grid has not changed size since the last initialization
851  }
852 
853  TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
854  std::invalid_argument,
855  "SuperLU_DIST does not support solving the tranpose system" );
856 
857  data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
858 
859  // Equilbration option
860  bool equil = parameterList->get<bool>("Equil", false);
861  data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
862 
863  if( parameterList->isParameter("RowPerm") ){
864  RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry("RowPerm").validator();
865  parameterList->getEntry("RowPerm").setValidator(rowperm_validator);
866 
867  data_.options.RowPerm = getIntegralValue<SLUD::rowperm_t>(*parameterList, "RowPerm");
868  }
869 
870  if( parameterList->isParameter("LargeDiag_MC64-Options") ){
871  data_.largediag_mc64_job = parameterList->template get<int>("LargeDiag_MC64-Options");
872  }
873 
874  if( parameterList->isParameter("ColPerm") ){
875  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
876  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
877 
878  data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
879  }
880 
881  // TODO: Uncomment when supported
882  // if( parameterList->isParameter("IterRefine") ){
883  // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
884  // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
885  // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
886  // }
887  data_.options.IterRefine = SLUD::NOREFINE;
888 
889  bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
890  data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
891 
892  if( parameterList->isParameter("IsContiguous") ){
893  is_contiguous_ = parameterList->get<bool>("IsContiguous");
894  }
895  }
896 
897 
898  template <class Matrix, class Vector>
899  Teuchos::RCP<const Teuchos::ParameterList>
901  {
902  using std::string;
903  using Teuchos::tuple;
904  using Teuchos::ParameterList;
905  using Teuchos::EnhancedNumberValidator;
906  using Teuchos::setStringToIntegralParameter;
907  using Teuchos::setIntParameter;
908  using Teuchos::stringToIntegralParameterEntryValidator;
909 
910  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
911 
912  if( is_null(valid_params) ){
913  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
914 
915  Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
916  = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
917  col_row_validator->setMin(1);
918 
919  pl->set("npcol", data_.grid.npcol,
920  "Number of columns in the processor grid. "
921  "Must be set with nprow", col_row_validator);
922  pl->set("nprow", data_.grid.nprow,
923  "Number of rows in the SuperLU_DIST processor grid. "
924  "Must be set together with npcol", col_row_validator);
925 
926  // validator will catch any value besides NOTRANS
927  setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
928  "Solve for the transpose system or not",
929  tuple<string>("NOTRANS"),
930  tuple<string>("Do not solve with transpose"),
931  tuple<SLUD::trans_t>(SLUD::NOTRANS),
932  pl.getRawPtr());
933 
934  // Equillbration
935  pl->set("Equil", false, "Whether to equilibrate the system before solve");
936 
937  // TODO: uncomment when supported
938  // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
939  // "Type of iterative refinement to use",
940  // tuple<string>("NOREFINE", "DOUBLE"),
941  // tuple<string>("Do not use iterative refinement",
942  // "Do double iterative refinement"),
943  // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
944  // SLUD::DOUBLE),
945  // pl.getRawPtr());
946 
947  // Tiny pivot handling
948  pl->set("ReplaceTinyPivot", true,
949  "Specifies whether to replace tiny diagonals during LU factorization");
950 
951  // Row permutation
952  setStringToIntegralParameter<SLUD::rowperm_t>("RowPerm", "NOROWPERM",
953  "Specifies how to permute the rows of the "
954  "matrix for sparsity preservation",
955  tuple<string>("NOROWPERM", "LargeDiag_MC64"),
956  tuple<string>("Natural ordering",
957  "Duff/Koster algorithm"),
958  tuple<SLUD::rowperm_t>(SLUD::NOROWPERM,
959  SLUD::LargeDiag_MC64),
960  pl.getRawPtr());
961 
962  setIntParameter("LargeDiag_MC64-Options", 4, "Options for RowPerm-LargeDiag_MC64", pl.getRawPtr());
963 
964  // Column permutation
965  setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
966  "Specifies how to permute the columns of the "
967  "matrix for sparsity preservation",
968  tuple<string>("NATURAL", "PARMETIS"),
969  tuple<string>("Natural ordering",
970  "ParMETIS ordering on A^T + A"),
971  tuple<SLUD::colperm_t>(SLUD::NATURAL,
972  SLUD::PARMETIS),
973  pl.getRawPtr());
974 
975  pl->set("IsContiguous", true, "Whether GIDs contiguous");
976 
977  valid_params = pl;
978  }
979 
980  return valid_params;
981  }
982 
983 
984  template <class Matrix, class Vector>
985  void
987  SLUD::int_t& nprow,
988  SLUD::int_t& npcol) const {
989  TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
990  std::invalid_argument,
991  "Number of MPI processes must be at least 1" );
992  SLUD::int_t c, r = 1;
993  while( r*r <= nprocs ) r++;
994  nprow = npcol = --r; // fall back to square grid
995  c = nprocs / r;
996  while( (r--)*c != nprocs ){
997  c = nprocs / r; // note integer division
998  }
999  ++r;
1000  // prefer the square grid over a single row (which will only happen
1001  // in the case of a prime nprocs
1002  if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases
1003  nprow = r;
1004  npcol = c;
1005  }
1006  }
1007 
1008 
1009  template <class Matrix, class Vector>
1010  bool
1012  // Extract the necessary information from mat and call SLU function
1013  using Teuchos::Array;
1014  using Teuchos::ArrayView;
1015  using Teuchos::ptrInArg;
1016  using Teuchos::as;
1017 
1018  using SLUD::int_t;
1019 
1020 #ifdef HAVE_AMESOS2_TIMERS
1021  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
1022 #endif
1023 
1024  // Cleanup old store memory if it's non-NULL
1025  if( data_.A.Store != NULL ){
1026  SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
1027  data_.A.Store = NULL;
1028  }
1029 
1030  Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
1031  = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
1032 
1033  int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
1034  l_nnz = as<int_t>(redist_mat->getLocalNNZ());
1035  l_rows = as<int_t>(redist_mat->getLocalNumRows());
1036  g_rows = as<int_t>(redist_mat->getGlobalNumRows());
1037  g_cols = g_rows; // we deal with square matrices
1038  fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
1039 
1040  Kokkos::resize(nzvals_view_, l_nnz);
1041  Kokkos::resize(colind_view_, l_nnz);
1042  Kokkos::resize(rowptr_view_, l_rows + 1);
1043  int_t nnz_ret = 0;
1044  {
1045 #ifdef HAVE_AMESOS2_TIMERS
1046  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
1047 #endif
1048 
1050  host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get(
1051  redist_mat.ptr(),
1052  nzvals_view_, colind_view_, rowptr_view_,
1053  nnz_ret,
1054  ptrInArg(*superlu_rowmap_),
1055  ROOTED,
1056  ARBITRARY);
1057  }
1058 
1059  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
1060  std::runtime_error,
1061  "Did not get the expected number of non-zero vals");
1062 
1063  // Get the SLU data type for this type of matrix
1064  SLUD::Dtype_t dtype = type_map::dtype;
1065 
1066  if( in_grid_ ){
1067  function_map::create_CompRowLoc_Matrix(&(data_.A),
1068  g_rows, g_cols,
1069  l_nnz, l_rows, fst_global_row,
1070  nzvals_view_.data(),
1071  colind_view_.data(),
1072  rowptr_view_.data(),
1073  SLUD::SLU_NR_loc,
1074  dtype, SLUD::SLU_GE);
1075  }
1076 
1077  return true;
1078 }
1079 
1080 
1081  template<class Matrix, class Vector>
1082  const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
1083 
1084 
1085 } // end namespace Amesos2
1086 
1087 #endif // AMESOS2_SUPERLUDIST_DEF_HPP
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Return a const parameter list of all of the valid parameters that this-&gt;setParameterList(...) will accept.
Definition: Amesos2_SolverCore_def.hpp:558
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
void computeRowPermutationLargeDiagMC64(SLUD::SuperMatrix &GA)
Compute the row permutation for option LargeDiag-MC64.
Definition: Amesos2_Superludist_def.hpp:337
Amesos2 interface to the distributed memory version of SuperLU.
Definition: Amesos2_Superludist_decl.hpp:90
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:479
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > &parameterList) override
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:526
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superludist_def.hpp:69
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superludist_def.hpp:823
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:476
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:267
Utility functions for Amesos2.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition: Amesos2_SolverCore_decl.hpp:363
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superludist_def.hpp:900
Provides definition of SuperLU_DIST types as well as conversions and type traits. ...
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superludist_def.hpp:392
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:659
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superludist_def.hpp:814
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition: Amesos2_Superludist_decl.hpp:343
~Superludist()
Destructor.
Definition: Amesos2_Superludist_def.hpp:242
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition: Amesos2_Superludist_def.hpp:448
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition: Amesos2_Superludist_def.hpp:986
bool in_grid_
true if this processor is in SuperLU_DISTS&#39;s 2D process grid
Definition: Amesos2_Superludist_decl.hpp:335
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:373
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition: Amesos2_Superludist_def.hpp:648
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition: Amesos2_Superludist_def.hpp:1011
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition: Amesos2_SolverCore_decl.hpp:455
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition: Amesos2_Superludist_def.hpp:497