Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
52 #ifndef AMESOS2_SUPERLUDIST_DEF_HPP
53 #define AMESOS2_SUPERLUDIST_DEF_HPP
54 
55 #include <Teuchos_Tuple.hpp>
56 #include <Teuchos_StandardParameterEntryValidators.hpp>
57 #include <Teuchos_DefaultMpiComm.hpp>
58 
61 #include "Amesos2_Util.hpp"
62 
63 
64 namespace Amesos2 {
65 
66 
67  template <class Matrix, class Vector>
68  Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
69  Teuchos::RCP<Vector> X,
70  Teuchos::RCP<const Vector> B)
71  : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
72  , nzvals_() // initialization to empty arrays
73  , colind_()
74  , rowptr_()
75  , bvals_()
76  , xvals_()
77  , in_grid_(false)
78  , is_contiguous_(true)
79  {
80  using Teuchos::Comm;
81  // It's OK to depend on MpiComm explicitly here, because
82  // SuperLU_DIST requires MPI anyway.
83  using Teuchos::MpiComm;
84  using Teuchos::outArg;
85  using Teuchos::ParameterList;
86  using Teuchos::parameterList;
87  using Teuchos::RCP;
88  using Teuchos::rcp;
89  using Teuchos::rcp_dynamic_cast;
90  using Teuchos::REDUCE_SUM;
91  using Teuchos::reduceAll;
92  typedef global_ordinal_type GO;
93  typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
94 
96  // Set up the SuperLU_DIST processor grid //
98 
99  RCP<const Comm<int> > comm = this->getComm ();
100  const int myRank = comm->getRank ();
101  const int numProcs = comm->getSize ();
102 
103  SLUD::int_t nprow, npcol;
104  get_default_grid_size (numProcs, nprow, npcol);
105 
106  {
107  // FIXME (mfh 16 Dec 2014) getComm() just returns
108  // matrixA_->getComm(), so it's not clear why we need to ask for
109  // the matrix's communicator separately here.
110  RCP<const Comm<int> > matComm = this->matrixA_->getComm ();
111  TEUCHOS_TEST_FOR_EXCEPTION(
112  matComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
113  "constructor: The matrix's communicator is null!");
114  RCP<const MpiComm<int> > matMpiComm =
115  rcp_dynamic_cast<const MpiComm<int> > (matComm);
116  // FIXME (mfh 16 Dec 2014) If the matrix's communicator is a
117  // SerialComm, we probably could just use MPI_COMM_SELF here.
118  // I'm not sure if SuperLU_DIST is smart enough to handle that
119  // case, though.
120  TEUCHOS_TEST_FOR_EXCEPTION(
121  matMpiComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
122  "constructor: The matrix's communicator is not an MpiComm!");
123  TEUCHOS_TEST_FOR_EXCEPTION(
124  matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::"
125  "Superlustdist constructor: The matrix's communicator claims to be a "
126  "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
127  "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
128  "exist, which likely implies that the Teuchos::MpiComm was constructed "
129  "incorrectly. It means something different than if the MPI_Comm were "
130  "MPI_COMM_NULL.");
131  MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
132  data_.mat_comm = rawMpiComm;
133  // This looks a bit like ScaLAPACK's grid initialization (which
134  // technically takes place in the BLACS, not in ScaLAPACK
135  // proper). See http://netlib.org/scalapack/slug/node34.html.
136  // The main difference is that SuperLU_DIST depends explicitly
137  // on MPI, while the BLACS hides its communication protocol.
138  SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
139  }
140 
142  // Set some default parameters. //
143  // //
144  // Must do this after grid has been created in //
145  // case user specifies the nprow and npcol parameters //
147  SLUD::set_default_options_dist(&data_.options);
148 
149  RCP<ParameterList> default_params =
150  parameterList (* (this->getValidParameters ()));
151  this->setParameters (default_params);
152 
153  // Set some internal options
154  data_.options.Fact = SLUD::DOFACT;
155  data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
156  data_.options.SolveInitialized = SLUD::NO;
157  data_.options.RefineInitialized = SLUD::NO;
158  data_.rowequ = false;
159  data_.colequ = false;
160  data_.perm_r.resize(this->globalNumRows_);
161  data_.perm_c.resize(this->globalNumCols_);
162 
164  // Set up a communicator for the parallel column ordering and //
165  // parallel symbolic factorization. //
167  data_.symb_comm = MPI_COMM_NULL;
168 
169  // domains is the next power of 2 less than nprow*npcol. This
170  // value will be used for creating an MPI communicator for the
171  // pre-ordering and symbolic factorization methods.
172  data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
173 
174  const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
175  MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
176 
178  // Set up a row Map that only includes processes that are in the
179  // SuperLU process grid. This will be used for redistributing A.
181 
182  // mfh 16 Dec 2014: We could use createWeightedContigMapWithNode
183  // with myProcParticipates as the weight, but that costs an extra
184  // all-reduce.
185 
186  // Set to 1 if I am in the grid, and I get some of the matrix rows.
187  int myProcParticipates = 0;
188  if (myRank < nprow * npcol) {
189  in_grid_ = true;
190  myProcParticipates = 1;
191  }
192 
193  // Compute how many processes in the communicator belong to the
194  // process grid.
195  int numParticipatingProcs = 0;
196  reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
197  outArg (numParticipatingProcs));
198  TEUCHOS_TEST_FOR_EXCEPTION(
199  this->globalNumRows_ != 0 && numParticipatingProcs == 0,
200  std::logic_error, "Amesos2::Superludist constructor: The matrix has "
201  << this->globalNumRows_ << " > 0 global row(s), but no processes in the "
202  "communicator want to participate in its factorization! nprow = "
203  << nprow << " and npcol = " << npcol << ".");
204 
205  // Divide up the rows among the participating processes.
206  size_t myNumRows = 0;
207  {
208  const GO GNR = static_cast<GO> (this->globalNumRows_);
209  const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
210  GNR / static_cast<GO> (numParticipatingProcs);
211  const GO remainder =
212  GNR - quotient * static_cast<GO> (numParticipatingProcs);
213  const GO lclNumRows = (static_cast<GO> (myRank) < remainder) ?
214  (quotient + static_cast<GO> (1)) : quotient;
215  myNumRows = static_cast<size_t> (lclNumRows);
216  }
217 
218  // TODO: might only need to initialize if parallel symbolic factorization is requested.
219  const GO indexBase = this->matrixA_->getRowMap ()->getIndexBase ();
221  rcp (new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
222 
224  // Do some other initialization //
226 
227  data_.A.Store = NULL;
228  function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
229  SLUD::PStatInit(&(data_.stat));
230  // We do not use ScalePermstructInit because we will use our own
231  // arrays for storing perm_r and perm_c
232  data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
233  data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
234  }
235 
236 
237  template <class Matrix, class Vector>
239  {
240  /* Free SuperLU_DIST data_types
241  * - Matrices
242  * - Vectors
243  * - Stat object
244  * - ScalePerm, LUstruct, grid, and solve objects
245  *
246  * Note: the function definitions are the same regardless whether
247  * complex or real, so we arbitrarily use the D namespace
248  */
249  if ( this->status_.getNumPreOrder() > 0 ){
251 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
252  SUPERLU_FREE( data_.sizes );
253  SUPERLU_FREE( data_.fstVtxSep );
254 #else
255  free( data_.sizes );
256  free( data_.fstVtxSep );
257 #endif
258  }
259 
260  // Cleanup old matrix store memory if it's non-NULL. Our
261  // Teuchos::Array's will destroy rowind, colptr, and nzval for us
262  if( data_.A.Store != NULL ){
263  SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
264  }
265 
266  // LU data is initialized in numericFactorization_impl()
267  if ( this->status_.getNumNumericFact() > 0 ){
268  function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
269  }
270  function_map::LUstructFree(&(data_.lu));
271 
272  // If a symbolic factorization is ever performed without a
273  // follow-up numericfactorization, there are some arrays in the
274  // Pslu_freeable struct which will never be free'd by
275  // SuperLU_DIST.
276  if ( this->status_.symbolicFactorizationDone() &&
277  !this->status_.numericFactorizationDone() ){
278  if ( data_.pslu_freeable.xlsub != NULL ){
279 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
280  SUPERLU_FREE( data_.pslu_freeable.xlsub );
281  SUPERLU_FREE( data_.pslu_freeable.lsub );
282 #else
283  free( data_.pslu_freeable.xlsub );
284  free( data_.pslu_freeable.lsub );
285 #endif
286  }
287  if ( data_.pslu_freeable.xusub != NULL ){
288 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
289  SUPERLU_FREE( data_.pslu_freeable.xusub );
290  SUPERLU_FREE( data_.pslu_freeable.usub );
291 #else
292  free( data_.pslu_freeable.xusub );
293  free( data_.pslu_freeable.usub );
294 #endif
295  }
296  if ( data_.pslu_freeable.supno_loc != NULL ){
297 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
298  SUPERLU_FREE( data_.pslu_freeable.supno_loc );
299  SUPERLU_FREE( data_.pslu_freeable.xsup_beg_loc );
300  SUPERLU_FREE( data_.pslu_freeable.xsup_end_loc );
301 #else
302  free( data_.pslu_freeable.supno_loc );
303  free( data_.pslu_freeable.xsup_beg_loc );
304  free( data_.pslu_freeable.xsup_end_loc );
305 #endif
306  }
307 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
308  SUPERLU_FREE( data_.pslu_freeable.globToLoc );
309 #else
310  free( data_.pslu_freeable.globToLoc );
311 #endif
312  }
313 
314  SLUD::PStatFree( &(data_.stat) ) ;
315 
316  // Teuchos::Arrays will free R, C, perm_r, and perm_c
317  // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
318 
319  if ( data_.options.SolveInitialized == SLUD::YES )
320  function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
321 
322  SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
323  // cases where grid
324  // wouldn't be initialized?
325 
326  if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
327  }
328 
329  template<class Matrix, class Vector>
330  int
332  {
333  // We will always use the NATURAL row ordering to avoid the
334  // sequential bottleneck present when doing any other row
335  // ordering scheme from SuperLU_DIST
336  //
337  // Set perm_r to be the natural ordering
338  SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
339  for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
340 
341  // loadA_impl(); // Refresh matrix values
342 
343  if( in_grid_ ){
344  // If this function has been called at least once, then the
345  // sizes, and fstVtxSep arrays were allocated in
346  // get_perm_c_parmetis. Delete them before calling that
347  // function again. These arrays will also be dealloc'd in the
348  // deconstructor.
349  if( this->status_.getNumPreOrder() > 0 ){
350 #if defined(AMESOS2_ENABLES_SUPERLUDIST_VERSION5_AND_HIGHER)
351  SUPERLU_FREE( data_.sizes );
352  SUPERLU_FREE( data_.fstVtxSep );
353 #else
354  free( data_.sizes );
355  free( data_.fstVtxSep );
356 #endif
357  }
358 #ifdef HAVE_AMESOS2_TIMERS
359  Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
360 #endif
361 
362  float info = 0.0;
363  info = SLUD::get_perm_c_parmetis( &(data_.A),
364  data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
365  data_.grid.nprow * data_.grid.npcol, data_.domains,
366  &(data_.sizes), &(data_.fstVtxSep),
367  &(data_.grid), &(data_.symb_comm) );
368 
369  TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
370  std::runtime_error,
371  "SuperLU_DIST pre-ordering ran out of memory after allocating "
372  << info << " bytes of memory" );
373  }
374 
375  // Ordering will be applied directly before numeric factorization,
376  // after we have a chance to get updated coefficients from the
377  // matrix
378 
379  return EXIT_SUCCESS;
380  }
381 
382 
383 
384  template <class Matrix, class Vector>
385  int
387  {
388  // loadA_impl(); // Refresh matrix values
389 
390  if( in_grid_ ){
391 
392 #ifdef HAVE_AMESOS2_TIMERS
393  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
394 #endif
395 
396  float info = 0.0;
397  info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
398  data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
399  data_.perm_r.getRawPtr(), data_.sizes,
400  data_.fstVtxSep, &(data_.pslu_freeable),
401  &(data_.grid.comm), &(data_.symb_comm),
402  &(data_.mem_usage));
403 
404  TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
405  std::runtime_error,
406  "SuperLU_DIST symbolic factorization ran out of memory after"
407  " allocating " << info << " bytes of memory" );
408  }
409  same_symbolic_ = false;
410  same_solve_struct_ = false;
411 
412  return EXIT_SUCCESS;
413  }
414 
415 
416  template <class Matrix, class Vector>
417  int
419  using Teuchos::as;
420 
421  // loadA_impl(); // Refresh the matrix values
422 
423  // if( data_.options.Equil == SLUD::YES ){
424  // // Apply the scalings computed in preOrdering
425  // function_map::laqgs(&(data_.A), data_.R.getRawPtr(),
426  // data_.C.getRawPtr(), data_.rowcnd, data_.colcnd,
427  // data_.amax, &(data_.equed));
428 
429  // data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
430  // data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
431  // }
432 
433  if( in_grid_ ){
434  // Apply the column ordering, so that AC is the column-permuted A, and compute etree
435  size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
436  for( size_t i = 0; i < nnz_loc; ++i ) colind_[i] = data_.perm_c[colind_[i]];
437 
438  // Distribute data from the symbolic factorization
439  if( same_symbolic_ ){
440  // Note: with the SamePattern_SameRowPerm options, it does not
441  // matter that the glu_freeable member has never been
442  // initialized, because it is never accessed. It is a
443  // placeholder arg. The real work is done in data_.lu
444  function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
445  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
446  &(data_.A), &(data_.scale_perm),
447  &(data_.glu_freeable), &(data_.lu),
448  &(data_.grid));
449  } else {
450  function_map::dist_psymbtonum(SLUD::DOFACT,
451  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
452  &(data_.A), &(data_.scale_perm),
453  &(data_.pslu_freeable), &(data_.lu),
454  &(data_.grid));
455  }
456 
457  // Retrieve the normI of A (required by gstrf).
458  double anorm = function_map::plangs((char *)"I", &(data_.A), &(data_.grid));
459 
460  int info = 0;
461  {
462 #ifdef HAVE_AMESOS2_TIMERS
463  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
464 #endif
465 
466  function_map::gstrf(&(data_.options), this->globalNumRows_,
467  this->globalNumCols_, anorm, &(data_.lu),
468  &(data_.grid), &(data_.stat), &info);
469  }
470 
471  // Check output
472  TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
473  std::runtime_error,
474  "L and U factors have been computed but U("
475  << info << "," << info << ") is exactly zero "
476  "(i.e. U is singular)");
477  }
478 
479  // The other option, that info_st < 0, denotes invalid parameters
480  // to the function, but we'll assume for now that that won't
481  // happen.
482 
483  data_.options.Fact = SLUD::FACTORED;
484  same_symbolic_ = true;
485 
486  return EXIT_SUCCESS;
487  }
488 
489 
490  template <class Matrix, class Vector>
491  int
493  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
494  {
495  using Teuchos::as;
496 
497  // local_len_rhs is how many of the multivector rows belong to
498  // this processor in the SuperLU_DIST processor grid.
499  const size_t local_len_rhs = superlu_rowmap_->getNodeNumElements();
500  const global_size_type nrhs = X->getGlobalNumVectors();
501  const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
502 
503  // make sure our multivector storage is sized appropriately
504  bvals_.resize(nrhs * local_len_rhs);
505  xvals_.resize(nrhs * local_len_rhs);
506 
507  // We assume the global length of the two vectors have already been
508  // checked for compatibility
509 
510  { // get the values from B
511 #ifdef HAVE_AMESOS2_TIMERS
512  Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
513 #endif
514 
515  {
516  // The input dense matrix for B should be distributed in the
517  // same manner as the superlu_dist matrix. That is, if a
518  // processor has m_loc rows of A, then it should also have
519  // m_loc rows of B (and the same rows). We accomplish this by
520  // distributing the multivector rows with the same Map that
521  // the matrix A's rows are distributed.
522 #ifdef HAVE_AMESOS2_TIMERS
523  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
524 #endif
525 
526  // get grid-distributed mv data. The multivector data will be
527  // distributed across the processes in the SuperLU_DIST grid.
528  typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
529  copy_helper::do_get(B,
530  bvals_(),
531  local_len_rhs,
532  Teuchos::ptrInArg(*superlu_rowmap_));
533  }
534  } // end block for conversion time
535 
536  if( in_grid_ ){
537  // if( data_.options.trans == SLUD::NOTRANS ){
538  // if( data_.rowequ ){ // row equilibration has been done on AC
539  // // scale bxvals_ by diag(R)
540  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
541  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
542  // }
543  // } else if( data_.colequ ){ // column equilibration has been done on AC
544  // // scale bxvals_ by diag(C)
545  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
546  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
547  // }
548 
549  // Initialize the SOLVEstruct_t.
550  //
551  // We are able to reuse the solve struct if we have not changed
552  // the sparsity pattern of L and U since the last solve
553  if( !same_solve_struct_ ){
554  if( data_.options.SolveInitialized == SLUD::YES ){
555  function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
556  }
557  function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
558  data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
559  &(data_.grid), &(data_.solve_struct));
560  // Flag that we can reuse this solve_struct unless another
561  // symbolicFactorization is called between here and the next
562  // solve.
563  same_solve_struct_ = true;
564  }
565 
566  int ierr = 0; // returned error code
567  {
568 #ifdef HAVE_AMESOS2_TIMERS
569  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
570 #endif
571 
572  function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
573  &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
574  as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
575  as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
576  &(data_.solve_struct), &(data_.stat), &ierr);
577  } // end block for solve time
578 
579  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
580  std::runtime_error,
581  "Argument " << -ierr << " to gstrs had an illegal value" );
582 
583  // "Un-scale" the solution so that it is a solution of the original system
584  // if( data_.options.trans == SLUD::NOTRANS ){
585  // if( data_.colequ ){ // column equilibration has been done on AC
586  // // scale bxvals_ by diag(C)
587  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
588  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
589  // }
590  // } else if( data_.rowequ ){ // row equilibration has been done on AC
591  // // scale bxvals_ by diag(R)
592  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
593  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
594  // }
595  { // permute B to a solution of the original system
596 #ifdef HAVE_AMESOS2_TIMERS
597  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
598 #endif
599  SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
600  function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
601  as<SLUD::int_t>(local_len_rhs),
602  data_.solve_struct.row_to_proc,
603  data_.solve_struct.inv_perm_c,
604  bvals_.getRawPtr(), ld,
605  xvals_.getRawPtr(), ld,
606  as<int>(nrhs),
607  &(data_.grid));
608  }
609  }
610 
611  /* Update X's global values */
612  {
613 #ifdef HAVE_AMESOS2_TIMERS
614  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
615 #endif
616 
617  typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
618  put_helper::do_put(X,
619  xvals_(),
620  local_len_rhs,
621  Teuchos::ptrInArg(*superlu_rowmap_));
622  }
623 
624  return EXIT_SUCCESS;
625  }
626 
627 
628  template <class Matrix, class Vector>
629  bool
631  {
632  // SuperLU_DIST requires square matrices
633  return( this->globalNumRows_ == this->globalNumCols_ );
634  }
635 
636 
637  template <class Matrix, class Vector>
638  void
639  Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
640  {
641  using Teuchos::as;
642  using Teuchos::RCP;
643  using Teuchos::getIntegralValue;
644  using Teuchos::ParameterEntryValidator;
645 
646  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
647 
648  if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
649  TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
650  parameterList->isParameter("npcol")),
651  std::invalid_argument,
652  "nprow and npcol must be set together" );
653 
654  SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
655  SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
656 
657  TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
658  std::invalid_argument,
659  "nprow and npcol combination invalid" );
660 
661  if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
662  // De-allocate the default grid that was initialized in the constructor
663  SLUD::superlu_gridexit(&(data_.grid));
664  // Create a new grid
665  SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
666  } // else our grid has not changed size since the last initialization
667  }
668 
669  TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
670  std::invalid_argument,
671  "SuperLU_DIST does not support solving the tranpose system" );
672 
673  data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
674 
675  // TODO: Uncomment when supported
676  // bool equil = parameterList->get<bool>("Equil", true);
677  // data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
678  data_.options.Equil = SLUD::NO;
679 
680  if( parameterList->isParameter("ColPerm") ){
681  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
682  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
683 
684  data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
685  }
686 
687  // Always use the "NOROWPERM" option to avoid a serial bottleneck
688  // with the weighted bipartite matching algorithm used for the
689  // "LargeDiag" RowPerm. Note the inconsistency with the SuperLU
690  // User guide (which states that the value should be "NATURAL").
691  data_.options.RowPerm = SLUD::NOROWPERM;
692 
693  // TODO: Uncomment when supported
694  // if( parameterList->isParameter("IterRefine") ){
695  // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
696  // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
697 
698  // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
699  // }
700  data_.options.IterRefine = SLUD::NOREFINE;
701 
702  bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
703  data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
704 
705  if( parameterList->isParameter("IsContiguous") ){
706  is_contiguous_ = parameterList->get<bool>("IsContiguous");
707  }
708  }
709 
710 
711  template <class Matrix, class Vector>
712  Teuchos::RCP<const Teuchos::ParameterList>
714  {
715  using std::string;
716  using Teuchos::tuple;
717  using Teuchos::ParameterList;
718  using Teuchos::EnhancedNumberValidator;
719  using Teuchos::setStringToIntegralParameter;
720  using Teuchos::stringToIntegralParameterEntryValidator;
721 
722  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
723 
724  if( is_null(valid_params) ){
725  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
726 
727  Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
728  = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
729  col_row_validator->setMin(1);
730 
731  pl->set("npcol", data_.grid.npcol,
732  "Number of columns in the processor grid. "
733  "Must be set with nprow", col_row_validator);
734  pl->set("nprow", data_.grid.nprow,
735  "Number of rows in the SuperLU_DIST processor grid. "
736  "Must be set together with npcol", col_row_validator);
737 
738  // validator will catch any value besides NOTRANS
739  setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
740  "Solve for the transpose system or not",
741  tuple<string>("NOTRANS"),
742  tuple<string>("Do not solve with transpose"),
743  tuple<SLUD::trans_t>(SLUD::NOTRANS),
744  pl.getRawPtr());
745 
746  // TODO: uncomment when supported
747  // pl->set("Equil", false, "Whether to equilibrate the system before solve");
748 
749  // TODO: uncomment when supported
750  // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
751  // "Type of iterative refinement to use",
752  // tuple<string>("NOREFINE", "DOUBLE"),
753  // tuple<string>("Do not use iterative refinement",
754  // "Do double iterative refinement"),
755  // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
756  // SLUD::DOUBLE),
757  // pl.getRawPtr());
758 
759  pl->set("ReplaceTinyPivot", true,
760  "Specifies whether to replace tiny diagonals during LU factorization");
761 
762  setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
763  "Specifies how to permute the columns of the "
764  "matrix for sparsity preservation",
765  tuple<string>("NATURAL", "PARMETIS"),
766  tuple<string>("Natural ordering",
767  "ParMETIS ordering on A^T + A"),
768  tuple<SLUD::colperm_t>(SLUD::NATURAL,
769  SLUD::PARMETIS),
770  pl.getRawPtr());
771 
772  pl->set("IsContiguous", true, "Whether GIDs contiguous");
773 
774  valid_params = pl;
775  }
776 
777  return valid_params;
778  }
779 
780 
781  template <class Matrix, class Vector>
782  void
784  SLUD::int_t& nprow,
785  SLUD::int_t& npcol) const {
786  TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
787  std::invalid_argument,
788  "Number of MPI processes must be at least 1" );
789  SLUD::int_t c, r = 1;
790  while( r*r <= nprocs ) r++;
791  nprow = npcol = --r; // fall back to square grid
792  c = nprocs / r;
793  while( (r--)*c != nprocs ){
794  c = nprocs / r; // note integer division
795  }
796  ++r;
797  // prefer the square grid over a single row (which will only happen
798  // in the case of a prime nprocs
799  if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases
800  nprow = r;
801  npcol = c;
802  }
803  }
804 
805 
806  template <class Matrix, class Vector>
807  bool
809  // Extract the necessary information from mat and call SLU function
810  using Teuchos::Array;
811  using Teuchos::ArrayView;
812  using Teuchos::ptrInArg;
813  using Teuchos::as;
814 
815  using SLUD::int_t;
816 
817 #ifdef HAVE_AMESOS2_TIMERS
818  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
819 #endif
820 
821  // Cleanup old store memory if it's non-NULL
822  if( data_.A.Store != NULL ){
823  SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
824  data_.A.Store = NULL;
825  }
826 
827  Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
828  = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
829 
830  int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
831  l_nnz = as<int_t>(redist_mat->getLocalNNZ());
832  l_rows = as<int_t>(redist_mat->getLocalNumRows());
833  g_rows = as<int_t>(redist_mat->getGlobalNumRows());
834  g_cols = g_rows; // we deal with square matrices
835  fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
836 
837  nzvals_.resize(l_nnz);
838  colind_.resize(l_nnz);
839  rowptr_.resize(l_rows + 1);
840 
841  int_t nnz_ret = 0;
842  {
843 #ifdef HAVE_AMESOS2_TIMERS
844  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
845 #endif
846 
849  slu_type, int_t, int_t >::do_get(redist_mat.ptr(),
850  nzvals_(), colind_(), rowptr_(),
851  nnz_ret,
852  ptrInArg(*superlu_rowmap_),
853  ROOTED,
854  ARBITRARY);
855  }
856 
857  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
858  std::runtime_error,
859  "Did not get the expected number of non-zero vals");
860 
861  // Get the SLU data type for this type of matrix
862  SLUD::Dtype_t dtype = type_map::dtype;
863 
864  if( in_grid_ ){
865  function_map::create_CompRowLoc_Matrix(&(data_.A),
866  g_rows, g_cols,
867  l_nnz, l_rows, fst_global_row,
868  nzvals_.getRawPtr(),
869  colind_.getRawPtr(),
870  rowptr_.getRawPtr(),
871  SLUD::SLU_NR_loc,
872  dtype, SLUD::SLU_GE);
873  }
874 
875  return true;
876 }
877 
878 
879  template<class Matrix, class Vector>
880  const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
881 
882 
883 } // end namespace Amesos2
884 
885 #endif // AMESOS2_SUPERLUDIST_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
Amesos2 interface to the distributed memory version of SuperLU.
Definition: Amesos2_Superludist_decl.hpp:90
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:780
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:478
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superludist_def.hpp:68
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superludist_def.hpp:639
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:475
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:266
Utility functions for Amesos2.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superludist_def.hpp:713
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a const parameter list of all of the valid parameters that this-&gt;setParameterList(...) will accept.
Definition: Amesos2_SolverCore_def.hpp:314
Provides definition of SuperLU_DIST types as well as conversions and type traits. ...
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superludist_def.hpp:331
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition: Amesos2_SolverCore_decl.hpp:362
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superludist_def.hpp:630
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition: Amesos2_Superludist_decl.hpp:329
~Superludist()
Destructor.
Definition: Amesos2_Superludist_def.hpp:238
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:282
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition: Amesos2_Superludist_def.hpp:386
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition: Amesos2_Superludist_def.hpp:783
bool in_grid_
true if this processor is in SuperLU_DISTS&#39;s 2D process grid
Definition: Amesos2_Superludist_decl.hpp:322
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:344
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition: Amesos2_Superludist_def.hpp:492
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition: Amesos2_Superludist_def.hpp:808
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition: Amesos2_SolverCore_decl.hpp:454
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition: Amesos2_Superludist_def.hpp:418