Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Tacho_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef AMESOS2_TACHO_DEF_HPP
11 #define AMESOS2_TACHO_DEF_HPP
12 
13 #include <Teuchos_Tuple.hpp>
14 #include <Teuchos_ParameterList.hpp>
15 #include <Teuchos_StandardParameterEntryValidators.hpp>
16 
18 #include "Amesos2_Tacho_decl.hpp"
19 #include "Amesos2_Util.hpp"
20 
21 namespace Amesos2 {
22 
23 template <class Matrix, class Vector>
25  Teuchos::RCP<const Matrix> A,
26  Teuchos::RCP<Vector> X,
27  Teuchos::RCP<const Vector> B )
28  : SolverCore<Amesos2::TachoSolver,Matrix,Vector>(A, X, B)
29 {
30  data_.method = 1; // Cholesky
31  data_.variant = 2; // solver variant
32  data_.streams = 1; // # of streams
33  data_.verbose = false; // verbose
34 }
35 
36 
37 template <class Matrix, class Vector>
39 {
40  if ( this->root_ ) {
41  data_.solver.release();
42  }
43 }
44 
45 template <class Matrix, class Vector>
46 std::string
48 {
49  std::ostringstream oss;
50  oss << "Tacho solver interface";
51  return oss.str();
52 }
53 
54 template<class Matrix, class Vector>
55 int
57 {
58  return(0);
59 }
60 
61 template <class Matrix, class Vector>
62 int
64 {
65 #ifdef HAVE_AMESOS2_TIMERS
66  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
67 #endif
68 
69  int status = 0;
70  if ( this->root_ ) {
71  if(do_optimization()) {
72  this->matrixA_->returnRowPtr_kokkos_view(host_row_ptr_view_);
73  this->matrixA_->returnColInd_kokkos_view(host_cols_view_);
74  }
75 
76  data_.solver.setSolutionMethod(data_.method);
77  data_.solver.setLevelSetOptionAlgorithmVariant(data_.variant);
78  data_.solver.setSmallProblemThresholdsize(data_.small_problem_threshold_size);
79  data_.solver.setVerbose(data_.verbose);
80  data_.solver.setLevelSetOptionNumStreams(data_.streams);
81  // TODO: Confirm param options
82  // data_.solver.setMaxNumberOfSuperblocks(data_.max_num_superblocks);
83 
84  // Symbolic factorization currently must be done on host
85  data_.solver.analyze(this->globalNumCols_, host_row_ptr_view_, host_cols_view_);
86  data_.solver.initialize();
87  }
88  return status;
89 }
90 
91 
92 template <class Matrix, class Vector>
93 int
95 {
96 #ifdef HAVE_AMESOS2_TIMERS
97  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
98 #endif
99 
100  int status = 0;
101  if ( this->root_ ) {
102  if(do_optimization()) {
103  this->matrixA_->returnValues_kokkos_view(device_nzvals_view_);
104  }
105  data_.solver.factorize(device_nzvals_view_);
106  }
107  return status;
108 }
109 
110 template <class Matrix, class Vector>
111 int
113  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
114 {
115  using Teuchos::as;
116 
117  const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
118  const size_t nrhs = X->getGlobalNumVectors();
119 
120  // don't allocate b since it's handled by the copy manager and might just be
121  // be assigned, not copied anyways.
122  // also don't allocate x since we will also use do_get to allocate this if
123  // necessary. When a copy is not necessary we'll solve directly to the x
124  // values in the MV.
125  bool bDidAssignX;
126  { // Get values from RHS B
127 #ifdef HAVE_AMESOS2_TIMERS
128  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
129  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
130 #endif
131  const bool initialize_data = true;
132  const bool do_not_initialize_data = false;
133  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
134  device_solve_array_t>::do_get(initialize_data, B, this->bValues_,
135  as<size_t>(ld_rhs),
136  ROOTED, this->rowIndexBase_);
137  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
138  device_solve_array_t>::do_get(do_not_initialize_data, X, this->xValues_,
139  as<size_t>(ld_rhs),
140  ROOTED, this->rowIndexBase_);
141  }
142 
143  int ierr = 0; // returned error code
144 
145  if ( this->root_ ) { // Do solve!
146 #ifdef HAVE_AMESOS2_TIMER
147  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
148 #endif
149  // Bump up the workspace size if needed
150  if (workspace_.extent(0) < this->globalNumRows_ || workspace_.extent(1) < nrhs) {
151  workspace_ = device_solve_array_t(
152  Kokkos::ViewAllocateWithoutInitializing("t"), this->globalNumRows_, nrhs);
153  }
154 
155  data_.solver.solve(xValues_, bValues_, workspace_);
156 
157  int status = 0; // TODO: determine what error handling will be
158  if(status != 0) {
159  ierr = status;
160  }
161  }
162 
163  /* All processes should have the same error code */
164  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
165 
166  TEUCHOS_TEST_FOR_EXCEPTION( ierr != 0, std::runtime_error,
167  "tacho_solve has error code: " << ierr );
168 
169  /* Update X's global values */
170 
171  // if bDidAssignX, then we solved straight to the adapter's X memory space without
172  // requiring additional memory allocation, so the x data is already in place.
173  if(!bDidAssignX) {
174 #ifdef HAVE_AMESOS2_TIMERS
175  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
176 #endif
177 
178  // This will do nothing is if the target view matches the src view, which
179  // can be the case if the memory spaces match. See comments above for do_get.
180  Util::template put_1d_data_helper_kokkos_view<
181  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, xValues_,
182  as<size_t>(ld_rhs),
183  ROOTED, this->rowIndexBase_);
184  }
185 
186  return(ierr);
187 }
188 
189 
190 template <class Matrix, class Vector>
191 bool
193 {
194  // Tacho can only apply the solve routines to square matrices
195  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
196 }
197 
198 
199 template <class Matrix, class Vector>
200 void
201 TachoSolver<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
202 {
203  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
204 
205  // TODO: Confirm param options
206 
207  // factorization type
208  auto method_name = parameterList->get<std::string> ("method", "chol");
209  if (method_name == "chol")
210  data_.method = 1;
211  else if (method_name == "ldl")
212  data_.method = 2;
213  else if (method_name == "lu")
214  data_.method = 3;
215  else {
216  std::cout << "Error: not supported solution method\n";
217  }
218  // solver type
219  data_.variant = parameterList->get<int> ("variant", 2);
220  // small problem threshold
221  data_.small_problem_threshold_size = parameterList->get<int> ("small problem threshold size", 1024);
222  // verbosity
223  data_.verbose = parameterList->get<bool> ("verbose", false);
224  // # of streams
225  data_.streams = parameterList->get<int> ("num-streams", 1);
226  // TODO: Confirm param options
227  // data_.num_kokkos_threads = parameterList->get<int>("kokkos-threads", 1);
228  // data_.max_num_superblocks = parameterList->get<int>("max-num-superblocks", 4);
229 }
230 
231 
232 template <class Matrix, class Vector>
233 Teuchos::RCP<const Teuchos::ParameterList>
235 {
236  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
237 
238  if( is_null(valid_params) ){
239  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
240 
241  pl->set("method", "chol", "Type of factorization, chol, ldl, or lu");
242  pl->set("variant", 2, "Type of solver variant, 0, 1, or 2");
243  pl->set("small problem threshold size", 1024, "Problem size threshold below with Tacho uses LAPACK.");
244  pl->set("verbose", false, "Verbosity");
245  pl->set("num-streams", 1, "Number of GPU streams");
246 
247  // TODO: Confirm param options
248  // pl->set("kokkos-threads", 1, "Number of threads");
249  // pl->set("max-num-superblocks", 4, "Max number of superblocks");
250 
251  valid_params = pl;
252  }
253 
254  return valid_params;
255 }
256 
257 template <class Matrix, class Vector>
258 bool
260  return (this->root_ && (this->matrixA_->getComm()->getSize() == 1));
261 }
262 
263 template <class Matrix, class Vector>
264 bool
266 {
267 
268  if(current_phase == SOLVE) {
269  return(false);
270  }
271 
272  if(!do_optimization()) {
273 #ifdef HAVE_AMESOS2_TIMERS
274  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
275 #endif
276 
277  // Note views are allocated but eventually we should remove this.
278  // The internal copy manager will decide if we can assign or deep_copy
279  // and then allocate if necessary. However the GPU solvers are serial right
280  // now so I didn't complete refactoring the matrix code for the parallel
281  // case. If we added that later, we should have it hooked up to the copy
282  // manager and then these allocations can go away.
283  if( this->root_ ) {
284  device_nzvals_view_ = device_value_type_array(
285  Kokkos::ViewAllocateWithoutInitializing("nzvals"), this->globalNumNonZeros_);
286  host_cols_view_ = host_ordinal_type_array(
287  Kokkos::ViewAllocateWithoutInitializing("colind"), this->globalNumNonZeros_);
288  host_row_ptr_view_ = host_size_type_array(
289  Kokkos::ViewAllocateWithoutInitializing("rowptr"), this->globalNumRows_ + 1);
290  }
291 
292  typename host_size_type_array::value_type nnz_ret = 0;
293  {
294  #ifdef HAVE_AMESOS2_TIMERS
295  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
296  #endif
297 
298  TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
299  std::runtime_error,
300  "Row and column maps have different indexbase ");
301 
303  device_value_type_array, host_ordinal_type_array, host_size_type_array>::do_get(
304  this->matrixA_.ptr(),
305  device_nzvals_view_,
306  host_cols_view_,
307  host_row_ptr_view_,
308  nnz_ret,
309  ROOTED, ARBITRARY,
310  this->columnIndexBase_);
311  }
312  }
313 
314  return true;
315 }
316 
317 
318 template<class Matrix, class Vector>
319 const char* TachoSolver<Matrix,Vector>::name = "Tacho";
320 
321 
322 } // end namespace Amesos2
323 
324 #endif // AMESOS2_TACHO_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Amesos2 interface to the Tacho package.
Definition: Amesos2_Tacho_decl.hpp:33
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Tacho.
Definition: Amesos2_Tacho_def.hpp:63
Utility functions for Amesos2.
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Tacho_def.hpp:192
int numericFactorization_impl()
Tacho specific numeric factorization.
Definition: Amesos2_Tacho_def.hpp:94
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Tacho_def.hpp:56
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Tacho_def.hpp:234
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:625
std::string description() const override
Returns a short description of this Solver.
Definition: Amesos2_Tacho_def.hpp:47
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Tacho specific solve.
Definition: Amesos2_Tacho_def.hpp:112
TachoSolver(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Tacho_def.hpp:24
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Tacho_def.hpp:265
bool do_optimization() const
can we optimize size_type and ordinal_type for straight pass through
Definition: Amesos2_Tacho_def.hpp:259
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
~TachoSolver()
Destructor.
Definition: Amesos2_Tacho_def.hpp:38