Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_cuSOLVER_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef AMESOS2_CUSOLVER_DEF_HPP
11 #define AMESOS2_CUSOLVER_DEF_HPP
12 
13 #include <Teuchos_Tuple.hpp>
14 #include <Teuchos_ParameterList.hpp>
15 #include <Teuchos_StandardParameterEntryValidators.hpp>
16 
18 #include "Amesos2_cuSOLVER_decl.hpp"
19 
20 namespace Amesos2 {
21 
22 template <class Matrix, class Vector>
24  Teuchos::RCP<const Matrix> A,
25  Teuchos::RCP<Vector> X,
26  Teuchos::RCP<const Vector> B )
27  : SolverCore<Amesos2::cuSOLVER,Matrix,Vector>(A, X, B)
28 {
29  auto status = cusolverSpCreate(&data_.handle);
30  TEUCHOS_TEST_FOR_EXCEPTION( status != CUSOLVER_STATUS_SUCCESS,
31  std::runtime_error, "cusolverSpCreate failed");
32 
33  status = cusolverSpCreateCsrcholInfo(&data_.chol_info);
34  TEUCHOS_TEST_FOR_EXCEPTION( status != CUSOLVER_STATUS_SUCCESS,
35  std::runtime_error, "cusolverSpCreateCsrcholInfo failed");
36 
37  auto sparse_status = cusparseCreateMatDescr(&data_.desc);
38  TEUCHOS_TEST_FOR_EXCEPTION( sparse_status != CUSPARSE_STATUS_SUCCESS,
39  std::runtime_error, "cusparseCreateMatDescr failed");
40 }
41 
42 template <class Matrix, class Vector>
44 {
45  cusparseDestroyMatDescr(data_.desc);
46  cusolverSpDestroyCsrcholInfo(data_.chol_info);
47  cusolverSpDestroy(data_.handle);
48 }
49 
50 template<class Matrix, class Vector>
51 int
53 {
54 #ifdef HAVE_AMESOS2_TIMERS
55  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
56 #endif
57  if(do_optimization()) {
58  this->matrixA_->returnRowPtr_kokkos_view(device_row_ptr_view_);
59  this->matrixA_->returnColInd_kokkos_view(device_cols_view_);
60 
61  // reorder to optimize cuSolver
62  if(data_.bReorder) {
63  Amesos2::Util::reorder(
64  device_row_ptr_view_, device_cols_view_,
65  device_perm_, device_peri_, sorted_nnz,
66  true);
67  }
68  }
69 
70  return(0);
71 }
72 
73 template <class Matrix, class Vector>
74 int
76 {
77 #ifdef HAVE_AMESOS2_TIMERS
78  Teuchos::TimeMonitor symFactTimer(this->timers_.symFactTime_);
79 #endif
80 
81  int err = 0;
82  if ( this->root_ ) {
83  const int size = this->globalNumRows_;
84  const int nnz = device_cols_view_.size(); // reorder may have changed this
85  const int * colIdx = device_cols_view_.data();
86  const int * rowPtr = device_row_ptr_view_.data();
87  auto status = cusolverSpXcsrcholAnalysis(
88  data_.handle, size, nnz, data_.desc, rowPtr, colIdx, data_.chol_info);
89  err = (status != CUSOLVER_STATUS_SUCCESS) ? 1 : 0;
90  }
91 
92  Teuchos::broadcast(*(this->getComm()), 0, &err);
93  TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
94  std::runtime_error, "Amesos2 cuSolver symbolic failed.");
95 
96  return err;
97 }
98 
99 template <class Matrix, class Vector>
100 int
102 {
103 #ifdef HAVE_AMESOS2_TIMERS
104  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
105 #endif
106 
107  int err = 0;
108  if(do_optimization()) { // just supporting one rank right now
109  this->matrixA_->returnValues_kokkos_view(device_nzvals_view_);
110 
111  // reorder to optimize cuSolver
112  if(data_.bReorder) {
113  // must have original row and cols - maybe cache this from 1st symbiolic setup
114  // this setup exists to support the refactor option
115  device_size_type_array orig_device_row_ptr_view;
116  device_ordinal_type_array orig_device_cols_view;
117  this->matrixA_->returnRowPtr_kokkos_view(orig_device_row_ptr_view);
118  this->matrixA_->returnColInd_kokkos_view(orig_device_cols_view);
119  Amesos2::Util::reorder_values(
120  device_nzvals_view_, orig_device_row_ptr_view, device_row_ptr_view_, orig_device_cols_view,
121  device_perm_, device_peri_, sorted_nnz);
122  }
123 
124  const int size = this->globalNumRows_;
125  const int nnz = device_cols_view_.size(); // reorder may have changed this
126  const cusolver_type * values = device_nzvals_view_.data();
127  const int * colIdx = device_cols_view_.data();
128  const int * rowPtr = device_row_ptr_view_.data();
129 
130  size_t internalDataInBytes, workspaceInBytes;
131  auto status = function_map::bufferInfo(data_.handle, size, nnz, data_.desc,
132  values, rowPtr, colIdx, data_.chol_info,
133  &internalDataInBytes, &workspaceInBytes);
134 
135  if(status == CUSOLVER_STATUS_SUCCESS) {
136  const size_t buffer_size = workspaceInBytes / sizeof(cusolver_type);
137  if(buffer_size > buffer_.extent(0)) {
138  buffer_ = device_value_type_array(
139  Kokkos::ViewAllocateWithoutInitializing("cusolver buf"), buffer_size);
140  }
141  status = function_map::numeric(data_.handle, size, nnz, data_.desc,
142  values, rowPtr, colIdx, data_.chol_info, buffer_.data());
143  }
144  err = (status != CUSOLVER_STATUS_SUCCESS) ? 1 : 0;
145  }
146 
147  Teuchos::broadcast(*(this->getComm()), 0, &err);
148  TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
149  std::runtime_error, "Amesos2 cuSolver numeric failed.");
150 
151  return err;
152 }
153 
154 template <class Matrix, class Vector>
155 int
157  const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
158  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
159 {
160  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
161  const ordinal_type nrhs = X->getGlobalNumVectors();
162 
163  bool bAssignedX;
164  { // Get values from RHS B
165 #ifdef HAVE_AMESOS2_TIMERS
166  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
167 #endif
168 
169  const bool initialize_data = true;
170  const bool do_not_initialize_data = false;
171  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
172  device_solve_array_t>::do_get(initialize_data, B, this->bValues_, Teuchos::as<size_t>(ld_rhs),
173  ROOTED, this->rowIndexBase_);
174 
175  // In general we may want to write directly to the x space without a copy.
176  // So we 'get' x which may be a direct view assignment to the MV.
177  bAssignedX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
178  device_solve_array_t>::do_get(do_not_initialize_data, X, this->xValues_, Teuchos::as<size_t>(ld_rhs),
179  ROOTED, this->rowIndexBase_);
180  }
181 
182  int err = 0;
183 
184  if ( this->root_ ) { // Do solve!
185 #ifdef HAVE_AMESOS2_TIMERS
186  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
187 #endif
188 
189  const int size = this->globalNumRows_;
190 
191  if(data_.bReorder) {
192  Amesos2::Util::apply_reorder_permutation(
193  this->bValues_, this->permute_result_, this->device_perm_);
194  }
195  else {
196  this->permute_result_ = this->bValues_; // no permutation
197  }
198 
199  for(ordinal_type n = 0; n < nrhs; ++n) {
200  const cusolver_type * b = this->permute_result_.data() + n * size;
201  cusolver_type * x = this->xValues_.data() + n * size;
202  auto status = function_map::solve(
203  data_.handle, size, b, x, data_.chol_info, buffer_.data());
204  err = (status != CUSOLVER_STATUS_SUCCESS) ? 1 : 0;
205  if(err != 0) {
206  break;
207  }
208  }
209 
210  if(data_.bReorder && err == 0) {
211  Amesos2::Util::apply_reorder_permutation(
212  this->xValues_, this->permute_result_, this->device_peri_);
213  Kokkos::deep_copy(this->xValues_, this->permute_result_); // full copy since permute_result_ is reused
214  }
215  }
216 
217  /* Update X's global values */
218 
219  // if bDidAssignX, then we solved straight to the adapter's X memory space without
220  // requiring additional memory allocation, so the x data is already in place.
221  if(!bAssignedX) {
222 #ifdef HAVE_AMESOS2_TIMERS
223  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
224 #endif
225 
226  Util::template put_1d_data_helper_kokkos_view<
227  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, xValues_,
228  Teuchos::as<size_t>(ld_rhs), ROOTED, this->rowIndexBase_);
229  }
230 
231  Teuchos::broadcast(*(this->getComm()), 0, &err);
232  TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
233  std::runtime_error, "Amesos2 cuSolver solve failed.");
234 
235  return err;
236 }
237 
238 template <class Matrix, class Vector>
239 bool
241 {
242  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
243 }
244 
245 template <class Matrix, class Vector>
246 void
247 cuSOLVER<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
248 {
249  using Teuchos::RCP;
250  using Teuchos::ParameterEntryValidator;
251 
252  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
253 
254  if( parameterList->isParameter("Reorder") ){
255  RCP<const ParameterEntryValidator> reorder_validator = valid_params->getEntry("Reorder").validator();
256  parameterList->getEntry("Reorder").setValidator(reorder_validator);
257  }
258  data_.bReorder = parameterList->get<bool>("Reorder", true);
259 }
260 
261 template <class Matrix, class Vector>
262 Teuchos::RCP<const Teuchos::ParameterList>
264 {
265  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
266 
267  if( is_null(valid_params) ){
268  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
269 
270  pl->set("Reorder", true, "Whether GIDs contiguous");
271 
272  valid_params = pl;
273  }
274 
275  return valid_params;
276 }
277 
278 template <class Matrix, class Vector>
279 bool
281  return (this->root_ && (this->matrixA_->getComm()->getSize() == 1));
282 }
283 
284 template <class Matrix, class Vector>
285 bool
287 {
288  if(current_phase == SOLVE) {
289  return(false);
290  }
291 
292  if(!do_optimization()) { // we're only doing serial right now for cuSolver
293  TEUCHOS_TEST_FOR_EXCEPTION( true, std::runtime_error,
294  "cuSolver is only implemented for serial.");
295  }
296 
297  return true;
298 }
299 
300 template<class Matrix, class Vector>
301 const char* cuSOLVER<Matrix,Vector>::name = "cuSOLVER";
302 
303 } // end namespace Amesos2
304 
305 #endif // AMESOS2_CUSOLVER_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
const int size
Definition: klu2_simple.cpp:50
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_cuSOLVER_def.hpp:247
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_cuSOLVER_def.hpp:240
cuSOLVER(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_cuSOLVER_def.hpp:23
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_cuSOLVER_def.hpp:263
Amesos2 interface to Cholesky from cuSOLVER.
Definition: Amesos2_cuSOLVER_decl.hpp:25
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using cuSOLVER.
Definition: Amesos2_cuSOLVER_def.hpp:75
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_cuSOLVER_def.hpp:286
bool do_optimization() const
can we optimize size_type and ordinal_type for straight pass through
Definition: Amesos2_cuSOLVER_def.hpp:280
int numericFactorization_impl()
cuSOLVER specific numeric factorization
Definition: Amesos2_cuSOLVER_def.hpp:101
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_cuSOLVER_def.hpp:52
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
cuSOLVER specific solve.
Definition: Amesos2_cuSOLVER_def.hpp:156
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
~cuSOLVER()
Destructor.
Definition: Amesos2_cuSOLVER_def.hpp:43