Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_KLU2_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
18 #ifndef AMESOS2_KLU2_DEF_HPP
19 #define AMESOS2_KLU2_DEF_HPP
20 
21 #include <Teuchos_Tuple.hpp>
22 #include <Teuchos_ParameterList.hpp>
23 #include <Teuchos_StandardParameterEntryValidators.hpp>
24 
26 #include "Amesos2_KLU2_decl.hpp"
27 
28 namespace Amesos2 {
29 
30 
31 template <class Matrix, class Vector>
33  Teuchos::RCP<const Matrix> A,
34  Teuchos::RCP<Vector> X,
35  Teuchos::RCP<const Vector> B )
36  : SolverCore<Amesos2::KLU2,Matrix,Vector>(A, X, B)
37  , transFlag_(0)
38  , is_contiguous_(true)
39 {
40  ::KLU2::klu_defaults<klu2_dtype, local_ordinal_type> (&(data_.common_)) ;
41  data_.symbolic_ = NULL;
42  data_.numeric_ = NULL;
43 
44  // Override some default options
45  // TODO: use data_ here to init
46 }
47 
48 
49 template <class Matrix, class Vector>
51 {
52  /* Free KLU2 data_types
53  * - Matrices
54  * - Vectors
55  * - Other data
56  */
57  if (data_.symbolic_ != NULL)
58  ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
59  (&(data_.symbolic_), &(data_.common_)) ;
60  if (data_.numeric_ != NULL)
61  ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
62  (&(data_.numeric_), &(data_.common_)) ;
63 
64  // Storage is initialized in numericFactorization_impl()
65  //if ( data_.A.Store != NULL ){
66  // destoy
67  //}
68 
69  // only root allocated these SuperMatrices.
70  //if ( data_.L.Store != NULL ){ // will only be true for this->root_
71  // destroy ..
72  //}
73 }
74 
75 template <class Matrix, class Vector>
76 bool
78  return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
79 }
80 
81 template<class Matrix, class Vector>
82 int
84 {
85  /* TODO: Define what it means for KLU2
86  */
87 #ifdef HAVE_AMESOS2_TIMERS
88  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
89 #endif
90 
91  return(0);
92 }
93 
94 
95 template <class Matrix, class Vector>
96 int
98 {
99  if (data_.symbolic_ != NULL) {
100  ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
101  (&(data_.symbolic_), &(data_.common_)) ;
102  }
103 
104  if ( single_proc_optimization() ) {
105  host_ordinal_type_array host_row_ptr_view;
106  host_ordinal_type_array host_cols_view;
107  this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
108  this->matrixA_->returnColInd_kokkos_view(host_cols_view);
109  data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
110  ((local_ordinal_type)this->globalNumCols_, host_row_ptr_view.data(),
111  host_cols_view.data(), &(data_.common_)) ;
112  }
113  else
114  {
115  data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
116  ((local_ordinal_type)this->globalNumCols_, host_col_ptr_view_.data(),
117  host_rows_view_.data(), &(data_.common_)) ;
118 
119  } //end single_process_optim_check = false
120 
121  return(0);
122 }
123 
124 
125 template <class Matrix, class Vector>
126 int
128 {
129  using Teuchos::as;
130 
131  // Cleanup old L and U matrices if we are not reusing a symbolic
132  // factorization. Stores and other data will be allocated in gstrf.
133  // Only rank 0 has valid pointers, TODO: for KLU2
134 
135  int info = 0;
136  if ( this->root_ ) {
137 
138  { // Do factorization
139 #ifdef HAVE_AMESOS2_TIMERS
140  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
141 #endif
142 
143  if (data_.numeric_ != NULL) {
144  ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
145  (&(data_.numeric_), &(data_.common_));
146  }
147 
148  if ( single_proc_optimization() ) {
149  host_ordinal_type_array host_row_ptr_view;
150  host_ordinal_type_array host_cols_view;
151  this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
152  this->matrixA_->returnColInd_kokkos_view(host_cols_view);
153  this->matrixA_->returnValues_kokkos_view(host_nzvals_view_);
154  klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
155  data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
156  (host_row_ptr_view.data(), host_cols_view.data(), pValues,
157  data_.symbolic_, &(data_.common_));
158  }
159  else {
160  klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
161  data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
162  (host_col_ptr_view_.data(), host_rows_view_.data(), pValues,
163  data_.symbolic_, &(data_.common_));
164  } //end single_process_optim_check = false
165 
166  // To have a test which confirms a throw, we need MPI to throw on all the
167  // ranks. So we delay and broadcast first. Others throws in Amesos2 which
168  // happen on just the root rank would also have the same problem if we
169  // tested them but we decided to fix just this one for the present. This
170  // is the only error/throw we currently have a unit test for.
171  if(data_.numeric_ == nullptr) {
172  info = 1;
173  }
174 
175  // This is set after numeric factorization complete as pivoting can be used;
176  // In this case, a discrepancy between symbolic and numeric nnz total can occur.
177  if(info == 0) { // skip if error code so we don't segfault - will throw
178  this->setNnzLU( as<size_t>((data_.numeric_)->lnz) + as<size_t>((data_.numeric_)->unz) );
179  }
180  } // end scope
181 
182  } // end this->root_
183 
184  /* All processes should have the same error code */
185  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
186 
187  TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error,
188  "KLU2 numeric factorization failed");
189 
190  return(info);
191 }
192 
193 template <class Matrix, class Vector>
194 int
196  const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
197  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
198 {
199  using Teuchos::as;
200  int ierr = 0; // returned error code
201 
202  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
203  const size_t nrhs = X->getGlobalNumVectors();
204 
205  bool bDidAssignX;
206  bool bDidAssignB;
207  {
208 #ifdef HAVE_AMESOS2_TIMERS
209  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
210  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
211 #endif
212  const bool initialize_data = true;
213  const bool do_not_initialize_data = false;
214  if ( single_proc_optimization() && nrhs == 1 ) {
215  // no msp creation
216  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
217  host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
218 
219  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
220  host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
221  }
222  else {
223 
224  bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
225  host_solve_array_t>::do_get(initialize_data, B, bValues_,
226  as<size_t>(ld_rhs),
227  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
228  this->rowIndexBase_);
229 
230  // see Amesos2_Tacho_def.hpp for an explanation of why we 'get' X
231  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
232  host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
233  as<size_t>(ld_rhs),
234  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
235  this->rowIndexBase_);
236 
237  // klu_tsolve is going to put the solution x into the input b.
238  // Copy b to x then solve in x.
239  // We do not want to solve in b, then copy to x, because if b was assigned
240  // then the solve will change b permanently and mess up the next test cycle.
241  // However if b was actually a copy (bDidAssignB = false) then we can avoid
242  // this deep_copy and just assign xValues_ = bValues_.
243  if(bDidAssignB) {
244  Kokkos::deep_copy(xValues_, bValues_); // need deep_copy or solve will change adapter's b memory which should never happen
245  }
246  else {
247  xValues_ = bValues_; // safe because bValues_ does not point straight to adapter's memory space
248  }
249  }
250  }
251 
252  klu2_dtype * pxValues = function_map::convert_scalar(xValues_.data());
253  klu2_dtype * pbValues = function_map::convert_scalar(bValues_.data());
254 
255  // can be null for non root
256  if( this->root_) {
257  TEUCHOS_TEST_FOR_EXCEPTION(pbValues == nullptr,
258  std::runtime_error, "Amesos2 Runtime Error: b_vector returned null ");
259 
260  TEUCHOS_TEST_FOR_EXCEPTION(pxValues == nullptr,
261  std::runtime_error, "Amesos2 Runtime Error: x_vector returned null ");
262  }
263 
264  if ( single_proc_optimization() && nrhs == 1 ) {
265 #ifdef HAVE_AMESOS2_TIMERS
266  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
267 #endif
268 
269  // For this case, Crs matrix raw pointers were used, so the non-transpose default solve
270  // is actually the transpose solve as klu_solve expects Ccs matrix pointers
271  // Thus, if the transFlag_ is true, the non-transpose solve should be used
272  if (transFlag_ == 0)
273  {
274  ::KLU2::klu_tsolve2<klu2_dtype, local_ordinal_type>
275  (data_.symbolic_, data_.numeric_,
276  (local_ordinal_type)this->globalNumCols_,
277  (local_ordinal_type)nrhs,
278  pbValues, pxValues, &(data_.common_)) ;
279  }
280  else {
281  ::KLU2::klu_solve2<klu2_dtype, local_ordinal_type>
282  (data_.symbolic_, data_.numeric_,
283  (local_ordinal_type)this->globalNumCols_,
284  (local_ordinal_type)nrhs,
285  pbValues, pxValues, &(data_.common_)) ;
286  }
287 
288  /* All processes should have the same error code */
289  // Teuchos::broadcast(*(this->getComm()), 0, &ierr);
290 
291  } // end single_process_optim_check && nrhs == 1
292  else // single proc optimizations but nrhs > 1,
293  // or distributed over processes case
294  {
295  if ( this->root_ ) {
296 #ifdef HAVE_AMESOS2_TIMERS
297  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
298 #endif
299  if (transFlag_ == 0)
300  {
301  // For this case, Crs matrix raw pointers were used, so the non-transpose default solve
302  // is actually the transpose solve as klu_solve expects Ccs matrix pointers
303  // Thus, if the transFlag_ is true, the non-transpose solve should be used
304  if ( single_proc_optimization() ) {
305  ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
306  (data_.symbolic_, data_.numeric_,
307  (local_ordinal_type)this->globalNumCols_,
308  (local_ordinal_type)nrhs,
309  pxValues, &(data_.common_)) ;
310  }
311  else {
312  ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
313  (data_.symbolic_, data_.numeric_,
314  (local_ordinal_type)this->globalNumCols_,
315  (local_ordinal_type)nrhs,
316  pxValues, &(data_.common_)) ;
317  }
318  }
319  else
320  {
321  // For this case, Crs matrix raw pointers were used, so the non-transpose default solve
322  // is actually the transpose solve as klu_solve expects Ccs matrix pointers
323  // Thus, if the transFlag_ is true, the non- transpose solve should be used
324  if ( single_proc_optimization() ) {
325  ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
326  (data_.symbolic_, data_.numeric_,
327  (local_ordinal_type)this->globalNumCols_,
328  (local_ordinal_type)nrhs,
329  pxValues, &(data_.common_)) ;
330  }
331  else {
332  ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
333  (data_.symbolic_, data_.numeric_,
334  (local_ordinal_type)this->globalNumCols_,
335  (local_ordinal_type)nrhs,
336  pxValues, &(data_.common_)) ;
337  }
338  }
339  } // end root_
340  } //end else
341 
342  // if bDidAssignX, then we solved straight to the adapter's X memory space without
343  // requiring additional memory allocation, so the x data is already in place.
344  if(!bDidAssignX) {
345 #ifdef HAVE_AMESOS2_TIMERS
346  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
347 #endif
348 
349  Util::put_1d_data_helper_kokkos_view<
350  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
351  as<size_t>(ld_rhs),
352  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
353  this->rowIndexBase_);
354 
355  }
356 
357  return(ierr);
358 }
359 
360 
361 template <class Matrix, class Vector>
362 bool
364 {
365  // The KLU2 factorization routines can handle square as well as
366  // rectangular matrices, but KLU2 can only apply the solve routines to
367  // square matrices, so we check the matrix for squareness.
368  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
369 }
370 
371 
372 template <class Matrix, class Vector>
373 void
374 KLU2<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
375 {
376  using Teuchos::RCP;
377  using Teuchos::getIntegralValue;
378  using Teuchos::ParameterEntryValidator;
379 
380  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
381 
382  transFlag_ = this->control_.useTranspose_ ? 1: 0;
383  // The KLU2 transpose option can override the Amesos2 option
384  if( parameterList->isParameter("Trans") ){
385  RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
386  parameterList->getEntry("Trans").setValidator(trans_validator);
387 
388  transFlag_ = getIntegralValue<int>(*parameterList, "Trans");
389  }
390 
391  if( parameterList->isParameter("IsContiguous") ){
392  is_contiguous_ = parameterList->get<bool>("IsContiguous");
393  }
394 }
395 
396 
397 template <class Matrix, class Vector>
398 Teuchos::RCP<const Teuchos::ParameterList>
400 {
401  using std::string;
402  using Teuchos::tuple;
403  using Teuchos::ParameterList;
404  using Teuchos::setStringToIntegralParameter;
405 
406  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
407 
408  if( is_null(valid_params) )
409  {
410  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
411 
412  pl->set("Equil", true, "Whether to equilibrate the system before solve, does nothing now");
413  pl->set("IsContiguous", true, "Whether GIDs contiguous");
414 
415  setStringToIntegralParameter<int>("Trans", "NOTRANS",
416  "Solve for the transpose system or not",
417  tuple<string>("NOTRANS","TRANS","CONJ"),
418  tuple<string>("Solve with transpose",
419  "Do not solve with transpose",
420  "Solve with the conjugate transpose"),
421  tuple<int>(0, 1, 2),
422  pl.getRawPtr());
423  valid_params = pl;
424  }
425 
426  return valid_params;
427 }
428 
429 
430 template <class Matrix, class Vector>
431 bool
433 {
434  using Teuchos::as;
435 
436  if(current_phase == SOLVE)return(false);
437 
438  if ( single_proc_optimization() ) {
439  // Do nothing in this case - Crs raw pointers will be used
440  }
441  else
442  {
443 
444 #ifdef HAVE_AMESOS2_TIMERS
445  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
446 #endif
447 
448  // Only the root image needs storage allocated
449  if( this->root_ ) {
450  host_nzvals_view_ = host_value_type_array(
451  Kokkos::ViewAllocateWithoutInitializing("host_nzvals_view_"), this->globalNumNonZeros_);
452  host_rows_view_ = host_ordinal_type_array(
453  Kokkos::ViewAllocateWithoutInitializing("host_rows_view_"), this->globalNumNonZeros_);
454  host_col_ptr_view_ = host_ordinal_type_array(
455  Kokkos::ViewAllocateWithoutInitializing("host_col_ptr_view_"), this->globalNumRows_ + 1);
456  }
457 
458  local_ordinal_type nnz_ret = 0;
459  {
460 #ifdef HAVE_AMESOS2_TIMERS
461  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
462 #endif
463 
465  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,host_ordinal_type_array>
466  ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_, nnz_ret,
467  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
468  ARBITRARY,
469  this->rowIndexBase_);
470  }
471 
472  if( this->root_ ) {
473  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
474  std::runtime_error,
475  "Did not get the expected number of non-zero vals");
476  }
477 
478  } //end else single_process_optim_check = false
479 
480  return true;
481 }
482 
483 
484 template<class Matrix, class Vector>
485 const char* KLU2<Matrix,Vector>::name = "KLU2";
486 
487 
488 } // end namespace Amesos2
489 
490 #endif // AMESOS2_KLU2_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
KLU2(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_KLU2_def.hpp:32
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
KLU2 specific solve.
Definition: Amesos2_KLU2_def.hpp:195
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Amesos2 KLU2 declarations.
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_KLU2_def.hpp:432
~KLU2()
Destructor.
Definition: Amesos2_KLU2_def.hpp:50
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_KLU2_def.hpp:374
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using KLU2.
Definition: Amesos2_KLU2_def.hpp:97
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_KLU2_def.hpp:83
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_KLU2_def.hpp:363
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
int numericFactorization_impl()
KLU2 specific numeric factorization.
Definition: Amesos2_KLU2_def.hpp:127
Amesos2 interface to the KLU2 package.
Definition: Amesos2_KLU2_decl.hpp:38
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition: Amesos2_KLU2_def.hpp:77
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_KLU2_def.hpp:399
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142