Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_KLU2_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
52 #ifndef AMESOS2_KLU2_DEF_HPP
53 #define AMESOS2_KLU2_DEF_HPP
54 
55 #include <Teuchos_Tuple.hpp>
56 #include <Teuchos_ParameterList.hpp>
57 #include <Teuchos_StandardParameterEntryValidators.hpp>
58 
60 #include "Amesos2_KLU2_decl.hpp"
61 
62 namespace Amesos2 {
63 
64 
65 template <class Matrix, class Vector>
67  Teuchos::RCP<const Matrix> A,
68  Teuchos::RCP<Vector> X,
69  Teuchos::RCP<const Vector> B )
70  : SolverCore<Amesos2::KLU2,Matrix,Vector>(A, X, B)
71  , transFlag_(0)
72  , is_contiguous_(true)
73 {
74  ::KLU2::klu_defaults<klu2_dtype, local_ordinal_type> (&(data_.common_)) ;
75  data_.symbolic_ = NULL;
76  data_.numeric_ = NULL;
77 
78  // Override some default options
79  // TODO: use data_ here to init
80 }
81 
82 
83 template <class Matrix, class Vector>
85 {
86  /* Free KLU2 data_types
87  * - Matrices
88  * - Vectors
89  * - Other data
90  */
91  if (data_.symbolic_ != NULL)
92  ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
93  (&(data_.symbolic_), &(data_.common_)) ;
94  if (data_.numeric_ != NULL)
95  ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
96  (&(data_.numeric_), &(data_.common_)) ;
97 
98  // Storage is initialized in numericFactorization_impl()
99  //if ( data_.A.Store != NULL ){
100  // destoy
101  //}
102 
103  // only root allocated these SuperMatrices.
104  //if ( data_.L.Store != NULL ){ // will only be true for this->root_
105  // destroy ..
106  //}
107 }
108 
109 template <class Matrix, class Vector>
110 bool
112  return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
113 }
114 
115 template<class Matrix, class Vector>
116 int
118 {
119  /* TODO: Define what it means for KLU2
120  */
121 #ifdef HAVE_AMESOS2_TIMERS
122  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
123 #endif
124 
125  return(0);
126 }
127 
128 
129 template <class Matrix, class Vector>
130 int
132 {
133  if (data_.symbolic_ != NULL) {
134  ::KLU2::klu_free_symbolic<klu2_dtype, local_ordinal_type>
135  (&(data_.symbolic_), &(data_.common_)) ;
136  }
137 
138  if ( single_proc_optimization() ) {
139  host_ordinal_type_array host_row_ptr_view;
140  host_ordinal_type_array host_cols_view;
141  this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
142  this->matrixA_->returnColInd_kokkos_view(host_cols_view);
143  data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
144  ((local_ordinal_type)this->globalNumCols_, host_row_ptr_view.data(),
145  host_cols_view.data(), &(data_.common_)) ;
146  }
147  else
148  {
149  data_.symbolic_ = ::KLU2::klu_analyze<klu2_dtype, local_ordinal_type>
150  ((local_ordinal_type)this->globalNumCols_, host_col_ptr_view_.data(),
151  host_rows_view_.data(), &(data_.common_)) ;
152 
153  } //end single_process_optim_check = false
154 
155  return(0);
156 }
157 
158 
159 template <class Matrix, class Vector>
160 int
162 {
163  using Teuchos::as;
164 
165  // Cleanup old L and U matrices if we are not reusing a symbolic
166  // factorization. Stores and other data will be allocated in gstrf.
167  // Only rank 0 has valid pointers, TODO: for KLU2
168 
169  int info = 0;
170  if ( this->root_ ) {
171 
172  { // Do factorization
173 #ifdef HAVE_AMESOS2_TIMERS
174  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
175 #endif
176 
177  if (data_.numeric_ != NULL) {
178  ::KLU2::klu_free_numeric<klu2_dtype, local_ordinal_type>
179  (&(data_.numeric_), &(data_.common_));
180  }
181 
182  if ( single_proc_optimization() ) {
183  host_ordinal_type_array host_row_ptr_view;
184  host_ordinal_type_array host_cols_view;
185  this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view);
186  this->matrixA_->returnColInd_kokkos_view(host_cols_view);
187  this->matrixA_->returnValues_kokkos_view(host_nzvals_view_);
188  klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
189  data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
190  (host_row_ptr_view.data(), host_cols_view.data(), pValues,
191  data_.symbolic_, &(data_.common_));
192  }
193  else {
194  klu2_dtype * pValues = function_map::convert_scalar(host_nzvals_view_.data());
195  data_.numeric_ = ::KLU2::klu_factor<klu2_dtype, local_ordinal_type>
196  (host_col_ptr_view_.data(), host_rows_view_.data(), pValues,
197  data_.symbolic_, &(data_.common_));
198  } //end single_process_optim_check = false
199 
200  // To have a test which confirms a throw, we need MPI to throw on all the
201  // ranks. So we delay and broadcast first. Others throws in Amesos2 which
202  // happen on just the root rank would also have the same problem if we
203  // tested them but we decided to fix just this one for the present. This
204  // is the only error/throw we currently have a unit test for.
205  if(data_.numeric_ == nullptr) {
206  info = 1;
207  }
208 
209  // This is set after numeric factorization complete as pivoting can be used;
210  // In this case, a discrepancy between symbolic and numeric nnz total can occur.
211  if(info == 0) { // skip if error code so we don't segfault - will throw
212  this->setNnzLU( as<size_t>((data_.numeric_)->lnz) + as<size_t>((data_.numeric_)->unz) );
213  }
214  } // end scope
215 
216  } // end this->root_
217 
218  /* All processes should have the same error code */
219  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
220 
221  TEUCHOS_TEST_FOR_EXCEPTION(info > 0, std::runtime_error,
222  "KLU2 numeric factorization failed");
223 
224  return(info);
225 }
226 
227 template <class Matrix, class Vector>
228 int
230  const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
231  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
232 {
233  using Teuchos::as;
234  int ierr = 0; // returned error code
235 
236  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
237  const size_t nrhs = X->getGlobalNumVectors();
238 
239  {
240 #ifdef HAVE_AMESOS2_TIMERS
241  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
242  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
243 #endif
244  if ( single_proc_optimization() && nrhs == 1 ) {
245  // no msp creation
246  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
247  host_solve_array_t>::do_get(B, bValues_, as<size_t>(ld_rhs));
248 
249  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
250  host_solve_array_t>::do_get(X, xValues_, as<size_t>(ld_rhs));
251  }
252  else {
253  if ( is_contiguous_ == true ) {
254  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
255  host_solve_array_t>::do_get(B, bValues_,
256  as<size_t>(ld_rhs),
257  ROOTED, this->rowIndexBase_);
258  }
259  else {
260  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
261  host_solve_array_t>::do_get(B, bValues_,
262  as<size_t>(ld_rhs),
263  CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
264  }
265 
266  // see Amesos2_Tacho_def.hpp for an explanation of why we 'get' X
267  if ( is_contiguous_ == true ) {
268  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
269  host_solve_array_t>::do_get(X, xValues_,
270  as<size_t>(ld_rhs),
271  ROOTED, this->rowIndexBase_);
272  }
273  else {
274  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
275  host_solve_array_t>::do_get(X, xValues_,
276  as<size_t>(ld_rhs),
277  CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
278  }
279 
280  // TODO: klu_tsolve is going to put the solution x into the input b.
281  // Copy b to x then solve in x.
282  // We do not want to solve in b, then copy to x, because if b was assigned
283  // then the solve will change b permanently and mess up the next test cycle.
284  // However if b was actually a copy (not assigned) then we can avoid this
285  // deep_copy and just assign xValues_ = bValues_.
286  // This comes up in a few places, see #7158, so planning to fix them all
287  // at the same time with some system to track what get_1d_copy_helper_kokkos_view
288  // actually did.
289  Kokkos::deep_copy(xValues_, bValues_);
290  }
291  }
292 
293  klu2_dtype * pxValues = function_map::convert_scalar(xValues_.data());
294  klu2_dtype * pbValues = function_map::convert_scalar(bValues_.data());
295 
296  // can be null for non root
297  if( this->root_) {
298  TEUCHOS_TEST_FOR_EXCEPTION(pbValues == nullptr,
299  std::runtime_error, "Amesos2 Runtime Error: b_vector returned null ");
300 
301  TEUCHOS_TEST_FOR_EXCEPTION(pxValues == nullptr,
302  std::runtime_error, "Amesos2 Runtime Error: x_vector returned null ");
303  }
304 
305  if ( single_proc_optimization() && nrhs == 1 ) {
306 #ifdef HAVE_AMESOS2_TIMERS
307  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
308 #endif
309 
310  // For this case, Crs matrix raw pointers were used, so the non-transpose default solve
311  // is actually the transpose solve as klu_solve expects Ccs matrix pointers
312  // Thus, if the transFlag_ is true, the non-transpose solve should be used
313  if (transFlag_ == 0)
314  {
315  ::KLU2::klu_tsolve2<klu2_dtype, local_ordinal_type>
316  (data_.symbolic_, data_.numeric_,
317  (local_ordinal_type)this->globalNumCols_,
318  (local_ordinal_type)nrhs,
319  pbValues, pxValues, &(data_.common_)) ;
320  }
321  else {
322  ::KLU2::klu_solve2<klu2_dtype, local_ordinal_type>
323  (data_.symbolic_, data_.numeric_,
324  (local_ordinal_type)this->globalNumCols_,
325  (local_ordinal_type)nrhs,
326  pbValues, pxValues, &(data_.common_)) ;
327  }
328 
329  /* All processes should have the same error code */
330  // Teuchos::broadcast(*(this->getComm()), 0, &ierr);
331 
332  } // end single_process_optim_check && nrhs == 1
333  else // single proc optimizations but nrhs > 1,
334  // or distributed over processes case
335  {
336  if ( this->root_ ) {
337 #ifdef HAVE_AMESOS2_TIMERS
338  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
339 #endif
340  if (transFlag_ == 0)
341  {
342  // For this case, Crs matrix raw pointers were used, so the non-transpose default solve
343  // is actually the transpose solve as klu_solve expects Ccs matrix pointers
344  // Thus, if the transFlag_ is true, the non-transpose solve should be used
345  if ( single_proc_optimization() ) {
346  ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
347  (data_.symbolic_, data_.numeric_,
348  (local_ordinal_type)this->globalNumCols_,
349  (local_ordinal_type)nrhs,
350  pxValues, &(data_.common_)) ;
351  }
352  else {
353  ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
354  (data_.symbolic_, data_.numeric_,
355  (local_ordinal_type)this->globalNumCols_,
356  (local_ordinal_type)nrhs,
357  pxValues, &(data_.common_)) ;
358  }
359  }
360  else
361  {
362  // For this case, Crs matrix raw pointers were used, so the non-transpose default solve
363  // is actually the transpose solve as klu_solve expects Ccs matrix pointers
364  // Thus, if the transFlag_ is true, the non- transpose solve should be used
365  if ( single_proc_optimization() ) {
366  ::KLU2::klu_solve<klu2_dtype, local_ordinal_type>
367  (data_.symbolic_, data_.numeric_,
368  (local_ordinal_type)this->globalNumCols_,
369  (local_ordinal_type)nrhs,
370  pxValues, &(data_.common_)) ;
371  }
372  else {
373  ::KLU2::klu_tsolve<klu2_dtype, local_ordinal_type>
374  (data_.symbolic_, data_.numeric_,
375  (local_ordinal_type)this->globalNumCols_,
376  (local_ordinal_type)nrhs,
377  pxValues, &(data_.common_)) ;
378  }
379  }
380  } // end root_
381  } //end else
382 
383  {
384 #ifdef HAVE_AMESOS2_TIMERS
385  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
386 #endif
387 
388  if ( is_contiguous_ == true ) {
389  Util::put_1d_data_helper_kokkos_view<
390  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
391  as<size_t>(ld_rhs),
392  ROOTED, this->rowIndexBase_);
393  }
394  else {
395  Util::put_1d_data_helper_kokkos_view<
396  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
397  as<size_t>(ld_rhs),
398  CONTIGUOUS_AND_ROOTED, this->rowIndexBase_);
399  }
400  }
401 
402  return(ierr);
403 }
404 
405 
406 template <class Matrix, class Vector>
407 bool
409 {
410  // The KLU2 factorization routines can handle square as well as
411  // rectangular matrices, but KLU2 can only apply the solve routines to
412  // square matrices, so we check the matrix for squareness.
413  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
414 }
415 
416 
417 template <class Matrix, class Vector>
418 void
419 KLU2<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
420 {
421  using Teuchos::RCP;
422  using Teuchos::getIntegralValue;
423  using Teuchos::ParameterEntryValidator;
424 
425  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
426 
427  transFlag_ = this->control_.useTranspose_ ? 1: 0;
428  // The KLU2 transpose option can override the Amesos2 option
429  if( parameterList->isParameter("Trans") ){
430  RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry("Trans").validator();
431  parameterList->getEntry("Trans").setValidator(trans_validator);
432 
433  transFlag_ = getIntegralValue<int>(*parameterList, "Trans");
434  }
435 
436  if( parameterList->isParameter("IsContiguous") ){
437  is_contiguous_ = parameterList->get<bool>("IsContiguous");
438  }
439 }
440 
441 
442 template <class Matrix, class Vector>
443 Teuchos::RCP<const Teuchos::ParameterList>
445 {
446  using std::string;
447  using Teuchos::tuple;
448  using Teuchos::ParameterList;
449  using Teuchos::setStringToIntegralParameter;
450 
451  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
452 
453  if( is_null(valid_params) )
454  {
455  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
456 
457  pl->set("Equil", true, "Whether to equilibrate the system before solve, does nothing now");
458  pl->set("IsContiguous", true, "Whether GIDs contiguous");
459 
460  setStringToIntegralParameter<int>("Trans", "NOTRANS",
461  "Solve for the transpose system or not",
462  tuple<string>("NOTRANS","TRANS","CONJ"),
463  tuple<string>("Solve with transpose",
464  "Do not solve with transpose",
465  "Solve with the conjugate transpose"),
466  tuple<int>(0, 1, 2),
467  pl.getRawPtr());
468  valid_params = pl;
469  }
470 
471  return valid_params;
472 }
473 
474 
475 template <class Matrix, class Vector>
476 bool
478 {
479  using Teuchos::as;
480 
481  if(current_phase == SOLVE)return(false);
482 
483  if ( single_proc_optimization() ) {
484  // Do nothing in this case - Crs raw pointers will be used
485  }
486  else
487  {
488 
489 #ifdef HAVE_AMESOS2_TIMERS
490  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
491 #endif
492 
493  // Only the root image needs storage allocated
494  if( this->root_ ){
495  host_nzvals_view_ = host_value_type_array(
496  Kokkos::ViewAllocateWithoutInitializing("host_nzvals_view_"), this->globalNumNonZeros_);
497  host_rows_view_ = host_ordinal_type_array(
498  Kokkos::ViewAllocateWithoutInitializing("host_rows_view_"), this->globalNumNonZeros_);
499  host_col_ptr_view_ = host_ordinal_type_array(
500  Kokkos::ViewAllocateWithoutInitializing("host_col_ptr_view_"), this->globalNumRows_ + 1);
501  }
502 
503  local_ordinal_type nnz_ret = 0;
504  {
505 #ifdef HAVE_AMESOS2_TIMERS
506  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
507 #endif
508 
509  if ( is_contiguous_ == true ) {
510  Util::get_ccs_helper_kokkos_view<
511  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,host_ordinal_type_array>
512  ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_,
513  nnz_ret, ROOTED, ARBITRARY, this->rowIndexBase_);
514  }
515  else {
516  Util::get_ccs_helper_kokkos_view<
517  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,host_ordinal_type_array>
518  ::do_get(this->matrixA_.ptr(), host_nzvals_view_, host_rows_view_, host_col_ptr_view_,
519  nnz_ret, CONTIGUOUS_AND_ROOTED, ARBITRARY, this->rowIndexBase_);
520  }
521  }
522 
523 
524  if( this->root_ ){
525  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
526  std::runtime_error,
527  "Did not get the expected number of non-zero vals");
528  }
529 
530  } //end else single_process_optim_check = false
531 
532  return true;
533 }
534 
535 
536 template<class Matrix, class Vector>
537 const char* KLU2<Matrix,Vector>::name = "KLU2";
538 
539 
540 } // end namespace Amesos2
541 
542 #endif // AMESOS2_KLU2_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
KLU2(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_KLU2_def.hpp:66
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
KLU2 specific solve.
Definition: Amesos2_KLU2_def.hpp:229
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
Amesos2 KLU2 declarations.
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_KLU2_def.hpp:477
~KLU2()
Destructor.
Definition: Amesos2_KLU2_def.hpp:84
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_KLU2_def.hpp:419
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using KLU2.
Definition: Amesos2_KLU2_def.hpp:131
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_KLU2_def.hpp:117
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_KLU2_def.hpp:408
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
int numericFactorization_impl()
KLU2 specific numeric factorization.
Definition: Amesos2_KLU2_def.hpp:161
Amesos2 interface to the KLU2 package.
Definition: Amesos2_KLU2_decl.hpp:72
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition: Amesos2_KLU2_def.hpp:111
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_KLU2_def.hpp:444
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176