Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Cholmod_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Amesos2: Templated Direct Sparse Solver Package
4 //
5 // Copyright 2011 NTESS and the Amesos2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
19 #ifndef AMESOS2_CHOLMOD_DEF_HPP
20 #define AMESOS2_CHOLMOD_DEF_HPP
21 
22 #include <Teuchos_Tuple.hpp>
23 #include <Teuchos_ParameterList.hpp>
24 #include <Teuchos_StandardParameterEntryValidators.hpp>
25 
27 #include "Amesos2_Cholmod_decl.hpp"
28 
29 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
30 #include "KokkosSparse_sptrsv_cholmod.hpp"
31 #endif
32 
33 namespace Amesos2 {
34 
35 
36 template <class Matrix, class Vector>
38  Teuchos::RCP<const Matrix> A,
39  Teuchos::RCP<Vector> X,
40  Teuchos::RCP<const Vector> B )
41  : SolverCore<Amesos2::Cholmod,Matrix,Vector>(A, X, B)
42  , firstsolve(true)
43  , skip_symfact(false)
44  , map()
45  , is_contiguous_(true) // default is set by params
46  , use_triangular_solves_(false) // default is set by params
47  , use_cholmod_int_type_(false) // if true we use CHOLMOD_INT (no GPU support)
48 {
49  data_.L = NULL;
50  data_.Y = NULL;
51  data_.E = NULL;
52 
53  data_.c.supernodal = CHOLMOD_AUTO;
54  data_.c.quick_return_if_not_posdef = 1;
55 }
56 
57 
58 template <class Matrix, class Vector>
60 {
61  if(use_cholmod_int_type_) {
62  cholmod_free_factor(&(data_.L), &(data_.c));
63  cholmod_free_dense(&(data_.Y), &data_.c);
64  cholmod_free_dense(&(data_.E), &data_.c);
65  cholmod_finish(&(data_.c));
66  }
67  else {
68  // useful to check if GPU is being used, but very verbose
69  // cholmod_l_gpu_stats(&(data_.c));
70  cholmod_l_free_factor(&(data_.L), &(data_.c));
71  cholmod_l_free_dense(&(data_.Y), &data_.c);
72  cholmod_l_free_dense(&(data_.E), &data_.c);
73  cholmod_l_finish(&(data_.c));
74  }
75 }
76 
77 template<class Matrix, class Vector>
78 int
80 {
81 #ifdef HAVE_AMESOS2_TIMERS
82  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
83 #endif
84 
85  int info = 0;
86 
87  if(use_cholmod_int_type_) {
88  data_.L = cholmod_analyze(&data_.A, &(data_.c));
89  }
90  else {
91  data_.L = cholmod_l_analyze(&data_.A, &(data_.c));
92  }
93 
94  info = data_.c.status;
95  skip_symfact = true;
96 
97  /* All processes should have the same error code */
98  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
99 
100  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
101  std::runtime_error,
102  "Amesos2 cholmod_l_analyze failure in Cholmod preOrdering_impl");
103 
104  return(0);
105 }
106 
107 
108 template <class Matrix, class Vector>
109 int
111 {
112  int info = 0;
113 
114  if(!skip_symfact) {
115 #ifdef HAVE_AMESOS2_TIMERS
116  Teuchos::TimeMonitor symFactTimer(this->timers_.symFactTime_);
117 #endif
118 
119  if(use_cholmod_int_type_) {
120  cholmod_resymbol (&data_.A, NULL, 0, true, data_.L, &(data_.c));
121  }
122  else {
123  cholmod_l_resymbol (&data_.A, NULL, 0, true, data_.L, &(data_.c));
124  }
125 
126  info = data_.c.status;
127  } else {
128  /*
129  * Symbolic factorization has already occured in preOrdering_impl,
130  * but if the user calls this routine directly later, we need to
131  * redo the symbolic factorization.
132  */
133  skip_symfact = false;
134  }
135 
136  /* All processes should have the same error code */
137  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
138 
139  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
140  std::runtime_error,
141  "Amesos2 cholmod_l_resymbol failure in Cholmod symbolicFactorization_impl");
142 
143  if(use_triangular_solves_) {
144  triangular_solve_symbolic();
145  }
146 
147  return(0);
148 }
149 
150 
151 template <class Matrix, class Vector>
152 int
154 {
155  int info = 0;
156 
157 #ifdef HAVE_AMESOS2_DEBUG
158  TEUCHOS_TEST_FOR_EXCEPTION(data_.A.ncol != Teuchos::as<size_t>(this->globalNumCols_),
159  std::runtime_error,
160  "Error in converting to cholmod_sparse: wrong number of global columns." );
161  TEUCHOS_TEST_FOR_EXCEPTION(data_.A.nrow != Teuchos::as<size_t>(this->globalNumRows_),
162  std::runtime_error,
163  "Error in converting to cholmod_sparse: wrong number of global rows." );
164 #endif
165 
166 #ifdef HAVE_AMESOS2_TIMERS
167  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
168 #endif
169 
170 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
171  // Add print out of views here - need host conversion first
172 #endif
173 
174  if(use_cholmod_int_type_) {
175  cholmod_factorize(&data_.A, data_.L, &(data_.c));
176  }
177  else {
178  cholmod_l_factorize(&data_.A, data_.L, &(data_.c));
179  }
180 
181  info = data_.c.status;
182 
183  /* All processes should have the same error code */
184  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
185 
186  TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_OUT_OF_MEMORY,
187  std::runtime_error,
188  "Amesos2 cholmod_l_factorize error code: CHOLMOD_OUT_OF_MEMORY");
189 
190  TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_NOT_POSDEF,
191  std::runtime_error,
192  "Amesos2 cholmod_l_factorize error code: CHOLMOD_NOT_POSDEF.");
193 
194  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
195  std::runtime_error,
196  "Amesos2 cholmod_l_factorize error code:" << info);
197 
198  if(use_triangular_solves_) {
199  triangular_solve_numeric();
200  }
201 
202  return(info);
203 }
204 
205 
206 template <class Matrix, class Vector>
207 int
209  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
210 {
211  const global_size_type ld_rhs = X->getGlobalLength();
212  const size_t nrhs = X->getGlobalNumVectors();
213 
214  bool bDidAssignX;
215  { // Get values from RHS B
216 #ifdef HAVE_AMESOS2_TIMERS
217  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
218  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
219 #endif
220 
221  // In general we may want to write directly to the x space without a copy.
222  // So we 'get' x which may be a direct view assignment to the MV.
223  const bool initialize_data = true;
224  const bool do_not_initialize_data = false;
225  if(use_triangular_solves_) { // to device
226 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
227  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
228  device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
229  Teuchos::as<size_t>(ld_rhs),
230  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
231  this->rowIndexBase_);
232  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
233  device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
234  Teuchos::as<size_t>(ld_rhs),
235  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
236  this->rowIndexBase_);
237 #endif
238  }
239  else { // to host
240  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
241  host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
242  Teuchos::as<size_t>(ld_rhs),
243  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
244  this->rowIndexBase_);
245  bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
246  host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
247  Teuchos::as<size_t>(ld_rhs),
248  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
249  this->rowIndexBase_);
250  }
251  }
252 
253  int ierr = 0; // returned error code
254 
255 #ifdef HAVE_AMESOS2_TIMERS
256  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
257 #endif
258 
259  if(use_triangular_solves_) {
260  triangular_solve();
261  }
262  else {
263  function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
264  Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_bValues_.data(), &data_.b);
265  function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
266  Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_xValues_.data(), &data_.x);
267 
268  cholmod_dense *xtemp = &(data_.x);
269  if(use_cholmod_int_type_) {
270  cholmod_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
271  &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
272  }
273  else {
274  cholmod_l_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
275  &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
276  }
277  }
278 
279  ierr = data_.c.status;
280 
281  /* All processes should have the same error code */
282  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
283 
284  TEUCHOS_TEST_FOR_EXCEPTION(ierr == -2, std::runtime_error, "Ran out of memory" );
285 
286  /* Update X's global values */
287 
288  // if bDidAssignX, then we solved straight to the adapter's X memory space without
289  // requiring additional memory allocation, so the x data is already in place.
290  if(!bDidAssignX) {
291 #ifdef HAVE_AMESOS2_TIMERS
292  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
293 #endif
294 
295  if(use_triangular_solves_) { // to device
296 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
297  Util::put_1d_data_helper_kokkos_view<
298  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
299  Teuchos::as<size_t>(ld_rhs),
300  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
301  this->rowIndexBase_);
302 #endif
303  }
304  else { // to host
305  Util::put_1d_data_helper_kokkos_view<
306  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
307  Teuchos::as<size_t>(ld_rhs),
308  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
309  this->rowIndexBase_);
310  }
311  }
312 
313  return(ierr);
314 }
315 
316 
317 template <class Matrix, class Vector>
318 bool
320 {
321  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
322 }
323 
324 
325 template <class Matrix, class Vector>
326 void
327 Cholmod<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
328 {
329  // Note that the ordering is very important here. We must do this sequence:
330  // (1) Read parameter for use_cholmod_int_type
331  // (2) Call cholmod_start or cholmod_l_start
332  // (3) Set additional data_.c values
333  use_cholmod_int_type_ = parameterList->get<bool>("CholmodInt", false);
334 
335  if(use_cholmod_int_type_) {
336  data_.c.itype = CHOLMOD_INT;
337  cholmod_start(&data_.c);
338  }
339  else {
340  data_.c.itype = CHOLMOD_LONG;
341  cholmod_l_start(&data_.c); // long form required for CUDA
342  }
343 
344  using Teuchos::RCP;
345  using Teuchos::getIntegralValue;
346  using Teuchos::ParameterEntryValidator;
347 
348  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
349 
350  is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
351  use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
352 
353  if(use_triangular_solves_) {
354 #if not defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) || not defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
355  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
356  "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_CHOLMOD not configured." );
357 #endif
358  }
359 
360  data_.c.dbound = parameterList->get<double>("dbound", 0.0);
361  data_.c.prefer_upper = (parameterList->get<bool>("PreferUpper", true)) ? 1 : 0;
362  data_.c.print = parameterList->get<int>("print", 3);
363  data_.c.nmethods = parameterList->get<int>("nmethods", 0); // tries AMD and may try METIS if necessary and available
364 
365  // useGPU = 1 means use GPU if possible - note this must be set after cholmod_l_start
366  // since cholmod_l_start sets it to the default value of -1.
367  // Setting to -1 means GPU is only used if env variable CHOLMOD_USE_GPU is set to 1.
368 #ifdef KOKKOS_ENABLE_CUDA
369  const int default_gpu_setting = 1; // use gpu by default
370 #else
371  // If building Trilinos with Cuda off, Cholmod can stil use GPU and the solver will still work.
372  // But that is likely not what was expected/wanted. If you do still want it, set the param to 1.
373  const int default_gpu_setting = 0; // use gpu by default
374 #endif
375 
376  data_.c.useGPU = parameterList->get<int>("useGPU", default_gpu_setting);
377 
378 #ifdef KOKKOS_ENABLE_CUDA
379  TEUCHOS_TEST_FOR_EXCEPTION(data_.c.useGPU != 0 && use_cholmod_int_type_, std::runtime_error,
380  "Amesos2 Cholmod solver must not use GPU (parameter useGPU = 0) if CholmodInt is turned on because Cholmod only supports GPU with long." );
381 #endif
382 
383  bool bSuperNodal = parameterList->get<bool>("SuperNodal", false);
384  data_.c.supernodal = bSuperNodal ? CHOLMOD_SUPERNODAL : CHOLMOD_AUTO;
385 }
386 
387 
388 template <class Matrix, class Vector>
389 Teuchos::RCP<const Teuchos::ParameterList>
391 {
392  using std::string;
393  using Teuchos::tuple;
394  using Teuchos::ParameterList;
395  using Teuchos::EnhancedNumberValidator;
396  using Teuchos::setStringToIntegralParameter;
397  using Teuchos::stringToIntegralParameterEntryValidator;
398 
399  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
400 
401  if( is_null(valid_params) ){
402  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
403 
404 
405  Teuchos::RCP<EnhancedNumberValidator<int> > print_validator
406  = Teuchos::rcp( new EnhancedNumberValidator<int>(0,5));
407 
408  Teuchos::RCP<EnhancedNumberValidator<int> > nmethods_validator
409  = Teuchos::rcp( new EnhancedNumberValidator<int>(0,9));
410 
411  pl->set("nmethods", 0, "Specifies the number of different ordering methods to try", nmethods_validator);
412 
413  pl->set("print", 3, "Specifies the verbosity of the print statements", print_validator);
414 
415  pl->set("dbound", 0.0,
416  "Specifies the smallest absolute value on the diagonal D for the LDL factorization");
417 
418  pl->set("PreferUpper", true,
419  "Specifies whether the matrix will be "
420  "stored in upper triangular form.");
421 
422  pl->set("useGPU", -1, "1: Use GPU is 1, 0: Do not use GPU, -1: ENV CHOLMOD_USE_GPU set GPU usage.");
423 
424  pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
425 
426  pl->set("CholmodInt", false, "Whether to use cholmod in int form");
427 
428  pl->set("IsContiguous", true, "Whether GIDs contiguous");
429 
430  pl->set("SuperNodal", false, "Whether to use super nodal");
431 
432  valid_params = pl;
433  }
434 
435  return valid_params;
436 }
437 
438 
439 template <class Matrix, class Vector>
440 bool
442 {
443 #ifdef HAVE_AMESOS2_TIMERS
444  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
445 #endif
446 
447  // Only the root image needs storage allocated
448  Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
449  if(use_cholmod_int_type_) {
450  Kokkos::resize(host_rows_int_view_, this->globalNumNonZeros_);
451  Kokkos::resize(host_col_ptr_int_view_, this->globalNumRows_ + 1);
452  }
453  else {
454  Kokkos::resize(host_rows_long_view_, this->globalNumNonZeros_);
455  Kokkos::resize(host_col_ptr_long_view_, this->globalNumRows_ + 1);
456  }
457 
458  {
459 #ifdef HAVE_AMESOS2_TIMERS
460  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
461 #endif
462 
463  TEUCHOS_TEST_FOR_EXCEPTION(this->rowIndexBase_ != this->columnIndexBase_,
464  std::runtime_error,
465  "Row and column maps have different indexbase ");
466 
467  if(use_cholmod_int_type_) {
468  int nnz_ret = 0;
470  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_int_type_array,
471  host_size_int_type_array>::do_get(this->matrixA_.ptr(),
472  host_nzvals_view_, host_rows_int_view_,
473  host_col_ptr_int_view_, nnz_ret,
474  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
475  ARBITRARY,
476  this->rowIndexBase_);
477 
478  TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
479  std::runtime_error,
480  "Did not get the expected number of non-zero vals");
481  }
482  else {
483  long nnz_ret = 0;
484 
486  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_long_type_array,
487  host_size_long_type_array>::do_get(this->matrixA_.ptr(),
488  host_nzvals_view_, host_rows_long_view_,
489  host_col_ptr_long_view_, nnz_ret,
490  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
491  ARBITRARY,
492  this->rowIndexBase_);
493 
494  TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
495  std::runtime_error,
496  "Did not get the expected number of non-zero vals");
497  }
498  }
499 
500  if(use_cholmod_int_type_) {
501  function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
502  Teuchos::as<size_t>(this->globalNumCols_),
503  Teuchos::as<size_t>(this->globalNumNonZeros_),
504  0,
505  host_col_ptr_int_view_.data(),
506  host_nzvals_view_.data(),
507  host_rows_int_view_.data(),
508  &(data_.A),
509  CHOLMOD_INT);
510  }
511  else {
512  function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
513  Teuchos::as<size_t>(this->globalNumCols_),
514  Teuchos::as<size_t>(this->globalNumNonZeros_),
515  0,
516  host_col_ptr_long_view_.data(),
517  host_nzvals_view_.data(),
518  host_rows_long_view_.data(),
519  &(data_.A),
520  CHOLMOD_LONG);
521  }
522 
523  TEUCHOS_TEST_FOR_EXCEPTION(data_.A.stype == 0, std::runtime_error,
524  "CHOLMOD loadA_impl loaded matrix but it is not symmetric.");
525 
526  return true;
527 }
528 
529 
530 template <class Matrix, class Vector>
531 void
533 {
534 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
535  // Create handles for U and U^T solves
536  if(use_cholmod_int_type_) {
537  device_int_khL_.create_sptrsv_handle(
538  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, true);
539  device_int_khU_.create_sptrsv_handle(
540  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, false);
541  }
542  else {
543  device_long_khL_.create_sptrsv_handle(
544  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, true);
545  device_long_khU_.create_sptrsv_handle(
546  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, false);
547  }
548 
549  // extract etree and iperm from CHOLMOD
550  Kokkos::resize(host_trsv_etree_, data_.L->nsuper);
551  if(use_cholmod_int_type_) {
552  int *int_etree = static_cast<int*>(data_.c.Iwork) + 2 * data_.L->n;
553  for (size_t i = 0 ; i < data_.L->nsuper; ++i) {
554  host_trsv_etree_(i) = int_etree[i];
555  }
556  }
557  else {
558  long *long_etree = static_cast<long*>(data_.c.Iwork) + 2 * data_.L->n;
559  for (size_t i = 0 ; i < data_.L->nsuper; ++i) { // convert long to int array for trsv API
560  host_trsv_etree_(i) = static_cast<int>(long_etree[i]);
561  }
562  }
563 
564  // set etree
565  if(use_cholmod_int_type_) {
566  device_int_khL_.set_sptrsv_etree(host_trsv_etree_.data());
567  device_int_khU_.set_sptrsv_etree(host_trsv_etree_.data());
568  }
569  else {
570  device_long_khL_.set_sptrsv_etree(host_trsv_etree_.data());
571  device_long_khU_.set_sptrsv_etree(host_trsv_etree_.data());
572  }
573 
574  size_t ld_rhs = this->matrixA_->getGlobalNumRows();
575  Kokkos::resize(host_trsv_perm_, ld_rhs);
576  if(use_cholmod_int_type_) {
577  int *int_iperm = static_cast<int*>(data_.L->Perm);
578  for (size_t i = 0; i < ld_rhs; i++) {
579  host_trsv_perm_(int_iperm[i]) = i;
580  }
581  }
582  else {
583  long *long_iperm = static_cast<long*>(data_.L->Perm);
584  for (size_t i = 0; i < ld_rhs; i++) {
585  host_trsv_perm_(long_iperm[i]) = i;
586  }
587  }
588  deep_copy_or_assign_view(device_trsv_perm_, host_trsv_perm_); // will use device to permute
589 
590  // set permutation
591  if(use_cholmod_int_type_) {
592  device_int_khL_.set_sptrsv_perm(host_trsv_perm_.data());
593  device_int_khU_.set_sptrsv_perm(host_trsv_perm_.data());
594  }
595  else {
596  device_long_khL_.set_sptrsv_perm(host_trsv_perm_.data());
597  device_long_khU_.set_sptrsv_perm(host_trsv_perm_.data());
598  }
599 
600  // Do symbolic analysis
601  if(use_cholmod_int_type_) {
602  KokkosSparse::Experimental::sptrsv_symbolic<int, kernel_handle_int_type>
603  (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
604  }
605  else {
606  KokkosSparse::Experimental::sptrsv_symbolic<long, kernel_handle_long_type>
607  (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
608  }
609 #endif
610 }
611 
612 
613 template <class Matrix, class Vector>
614 void
615 Cholmod<Matrix,Vector>::triangular_solve_numeric()
616 {
617 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
618  // Do numerical compute
619  if(use_cholmod_int_type_) {
620  KokkosSparse::Experimental::sptrsv_compute<int, kernel_handle_int_type>
621  (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
622  }
623  else {
624  KokkosSparse::Experimental::sptrsv_compute<long, kernel_handle_long_type>
625  (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
626  }
627 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
628 }
629 
630 
631 template <class Matrix, class Vector>
632 void
633 Cholmod<Matrix,Vector>::triangular_solve() const
634 {
635 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
636  size_t ld_rhs = device_xValues_.extent(0);
637  size_t nrhs = device_xValues_.extent(1);
638 
639  Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
640  Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
641 
642  // forward pivot
643  auto local_device_bValues = device_bValues_;
644  auto local_device_trsv_perm = device_trsv_perm_;
645  auto local_device_trsv_rhs = device_trsv_rhs_;
646  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
647  KOKKOS_LAMBDA(size_t j) {
648  for(size_t k = 0; k < nrhs; ++k) {
649  local_device_trsv_rhs(local_device_trsv_perm(j),k) = local_device_bValues(j,k);
650  }
651  });
652 
653  for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
654  auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
655  auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
656 
657  // do L solve= - numeric (only rhs is modified) on the default device/host space
658  if(use_cholmod_int_type_) {
659  KokkosSparse::Experimental::sptrsv_solve(&device_int_khL_, sub_sol, sub_rhs);
660  }
661  else {
662  KokkosSparse::Experimental::sptrsv_solve(&device_long_khL_, sub_sol, sub_rhs);
663  }
664 
665  // do L^T solve - numeric (only rhs is modified) on the default device/host space
666  if(use_cholmod_int_type_) {
667  KokkosSparse::Experimental::sptrsv_solve(&device_int_khU_, sub_rhs, sub_sol);
668  }
669  else {
670  KokkosSparse::Experimental::sptrsv_solve(&device_long_khU_, sub_rhs, sub_sol);
671  }
672 
673  } // end loop over rhs vectors
674 
675  // backward pivot
676  auto local_device_xValues = device_xValues_;
677  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
678  KOKKOS_LAMBDA(size_t j) {
679  for(size_t k = 0; k < nrhs; ++k) {
680  local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm(j),k);
681  }
682  });
683 #endif
684 }
685 
686 
687 template<class Matrix, class Vector>
688 const char* Cholmod<Matrix,Vector>::name = "Cholmod";
689 
690 
691 } // end namespace Amesos2
692 
693 #endif // AMESOS2_CHOLMOD_DEF_HPP
~Cholmod()
Destructor.
Definition: Amesos2_Cholmod_def.hpp:59
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Cholmod_def.hpp:79
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:71
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:614
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:31
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Cholmod_def.hpp:390
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Cholmod_def.hpp:319
Cholmod(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Cholmod_def.hpp:37
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
CHOLMOD specific solve.
Definition: Amesos2_Cholmod_def.hpp:208
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Cholmod_def.hpp:327
Amesos2 CHOLMOD declarations.
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:42
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Cholmod_def.hpp:441
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using CHOLMOD.
Definition: Amesos2_Cholmod_def.hpp:110
Amesos2 interface to the CHOLMOD package.
Definition: Amesos2_Cholmod_decl.hpp:42
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:142
int numericFactorization_impl()
CHOLMOD specific numeric factorization.
Definition: Amesos2_Cholmod_def.hpp:153