Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Cholmod_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
53 #ifndef AMESOS2_CHOLMOD_DEF_HPP
54 #define AMESOS2_CHOLMOD_DEF_HPP
55 
56 #include <Teuchos_Tuple.hpp>
57 #include <Teuchos_ParameterList.hpp>
58 #include <Teuchos_StandardParameterEntryValidators.hpp>
59 
61 #include "Amesos2_Cholmod_decl.hpp"
62 
63 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
64 #include "KokkosSparse_sptrsv_cholmod.hpp"
65 #endif
66 
67 namespace Amesos2 {
68 
69 
70 template <class Matrix, class Vector>
72  Teuchos::RCP<const Matrix> A,
73  Teuchos::RCP<Vector> X,
74  Teuchos::RCP<const Vector> B )
75  : SolverCore<Amesos2::Cholmod,Matrix,Vector>(A, X, B)
76  , firstsolve(true)
77  , skip_symfact(false)
78  , map()
79  , is_contiguous_(true) // default is set by params
80  , use_triangular_solves_(false) // default is set by params
81 {
82  data_.L = NULL;
83  data_.Y = NULL;
84  data_.E = NULL;
85 
86  cholmod_l_start(&data_.c); // long form required for CUDA
87 
88  // TODO: Add an option so we can support CHOLMOD_INT (if no GPU is required)
89  // This will require some kind of templating design in decl.hpp which uses the long type
90  data_.c.itype = CHOLMOD_LONG; // required for long support
91 
92  data_.c.supernodal = CHOLMOD_AUTO;
93  data_.c.quick_return_if_not_posdef = 1;
94 }
95 
96 
97 template <class Matrix, class Vector>
99 {
100  // useful to check if GPU is being used, but very verbose
101  // cholmod_l_gpu_stats(&(data_.c));
102 
103  cholmod_l_free_factor(&(data_.L), &(data_.c));
104  cholmod_l_free_dense(&(data_.Y), &data_.c);
105  cholmod_l_free_dense(&(data_.E), &data_.c);
106 
107  cholmod_l_finish(&(data_.c));
108 }
109 
110 template<class Matrix, class Vector>
111 int
113 {
114 #ifdef HAVE_AMESOS2_TIMERS
115  Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
116 #endif
117 
118  int info = 0;
119 
120  data_.L = cholmod_l_analyze(&data_.A, &(data_.c));
121  info = data_.c.status;
122  skip_symfact = true;
123 
124  /* All processes should have the same error code */
125  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
126 
127  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
128  std::runtime_error,
129  "Amesos2 cholmod_l_analyze failure in Cholmod preOrdering_impl");
130 
131  return(0);
132 }
133 
134 
135 template <class Matrix, class Vector>
136 int
138 {
139  int info = 0;
140 
141  if(!skip_symfact) {
142 #ifdef HAVE_AMESOS2_TIMERS
143  Teuchos::TimeMonitor symFactTimer(this->timers_.symFactTime_);
144 #endif
145 
146  cholmod_l_resymbol (&data_.A, NULL, 0, true, data_.L, &(data_.c));
147 
148  info = data_.c.status;
149  } else {
150  /*
151  * Symbolic factorization has already occured in preOrdering_impl,
152  * but if the user calls this routine directly later, we need to
153  * redo the symbolic factorization.
154  */
155  skip_symfact = false;
156  }
157 
158  /* All processes should have the same error code */
159  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
160 
161  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
162  std::runtime_error,
163  "Amesos2 cholmod_l_resymbol failure in Cholmod symbolicFactorization_impl");
164 
165  if(use_triangular_solves_) {
166  triangular_solve_symbolic();
167  }
168 
169  return(0);
170 }
171 
172 
173 template <class Matrix, class Vector>
174 int
176 {
177  int info = 0;
178 
179 #ifdef HAVE_AMESOS2_DEBUG
180  TEUCHOS_TEST_FOR_EXCEPTION(data_.A.ncol != Teuchos::as<size_t>(this->globalNumCols_),
181  std::runtime_error,
182  "Error in converting to cholmod_sparse: wrong number of global columns." );
183  TEUCHOS_TEST_FOR_EXCEPTION(data_.A.nrow != Teuchos::as<size_t>(this->globalNumRows_),
184  std::runtime_error,
185  "Error in converting to cholmod_sparse: wrong number of global rows." );
186 #endif
187 
188 #ifdef HAVE_AMESOS2_TIMERS
189  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
190 #endif
191 
192 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG
193  // Add print out of views here - need host conversion first
194 #endif
195 
196  cholmod_l_factorize(&data_.A, data_.L, &(data_.c));
197  info = data_.c.status;
198 
199  /* All processes should have the same error code */
200  Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
201 
202  TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_OUT_OF_MEMORY,
203  std::runtime_error,
204  "Amesos2 cholmod_l_factorize error code: CHOLMOD_OUT_OF_MEMORY");
205 
206  TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_NOT_POSDEF,
207  std::runtime_error,
208  "Amesos2 cholmod_l_factorize error code: CHOLMOD_NOT_POSDEF.");
209 
210  TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
211  std::runtime_error,
212  "Amesos2 cholmod_l_factorize error code:" << info);
213 
214  if(use_triangular_solves_) {
215  triangular_solve_numeric();
216  }
217 
218  return(info);
219 }
220 
221 
222 template <class Matrix, class Vector>
223 int
225  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
226 {
227  const global_size_type ld_rhs = X->getGlobalLength();
228  const size_t nrhs = X->getGlobalNumVectors();
229 
230  { // Get values from RHS B
231 #ifdef HAVE_AMESOS2_TIMERS
232  Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
233  Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
234 #endif
235 
236  // In general we may want to write directly to the x space without a copy.
237  // So we 'get' x which may be a direct view assignment to the MV.
238  if(use_triangular_solves_) { // to device
239 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
240  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
241  device_solve_array_t>::do_get(B, device_bValues_,
242  Teuchos::as<size_t>(ld_rhs),
243  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
244  this->rowIndexBase_);
245  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
246  device_solve_array_t>::do_get(X, device_xValues_,
247  Teuchos::as<size_t>(ld_rhs),
248  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
249  this->rowIndexBase_);
250 #endif
251  }
252  else { // to host
253  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
254  host_solve_array_t>::do_get(B, host_bValues_,
255  Teuchos::as<size_t>(ld_rhs),
256  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
257  this->rowIndexBase_);
258  Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
259  host_solve_array_t>::do_get(X, host_xValues_,
260  Teuchos::as<size_t>(ld_rhs),
261  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
262  this->rowIndexBase_);
263  }
264 
265  }
266 
267  int ierr = 0; // returned error code
268 
269 #ifdef HAVE_AMESOS2_TIMERS
270  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
271 #endif
272 
273  if(use_triangular_solves_) {
274  triangular_solve();
275  }
276  else {
277  function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
278  Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_bValues_.data(), &data_.b);
279  function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
280  Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_xValues_.data(), &data_.x);
281 
282  cholmod_dense *xtemp = &(data_.x);
283  cholmod_l_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
284  &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
285  }
286 
287  ierr = data_.c.status;
288 
289  /* All processes should have the same error code */
290  Teuchos::broadcast(*(this->getComm()), 0, &ierr);
291 
292  TEUCHOS_TEST_FOR_EXCEPTION(ierr == -2, std::runtime_error, "Ran out of memory" );
293 
294  /* Update X's global values */
295  {
296 #ifdef HAVE_AMESOS2_TIMERS
297  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
298 #endif
299 
300  if(use_triangular_solves_) { // to device
301 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
302  Util::put_1d_data_helper_kokkos_view<
303  MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
304  Teuchos::as<size_t>(ld_rhs),
305  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
306  this->rowIndexBase_);
307 #endif
308  }
309  else { // to host
310  Util::put_1d_data_helper_kokkos_view<
311  MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
312  Teuchos::as<size_t>(ld_rhs),
313  (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
314  this->rowIndexBase_);
315  }
316  }
317 
318  return(ierr);
319 }
320 
321 
322 template <class Matrix, class Vector>
323 bool
325 {
326  return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
327 }
328 
329 
330 template <class Matrix, class Vector>
331 void
332 Cholmod<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
333 {
334  using Teuchos::RCP;
335  using Teuchos::getIntegralValue;
336  using Teuchos::ParameterEntryValidator;
337 
338  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
339 
340  is_contiguous_ = parameterList->get<bool>("IsContiguous", true);
341  use_triangular_solves_ = parameterList->get<bool>("Enable_KokkosKernels_TriangularSolves", false);
342 
343  if(use_triangular_solves_) {
344 #if not defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) || not defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
345  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
346  "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_CHOLMOD not configured." );
347 #endif
348  }
349 
350  data_.c.dbound = parameterList->get<double>("dbound", 0.0);
351  data_.c.prefer_upper = (parameterList->get<bool>("PreferUpper", true)) ? 1 : 0;
352  data_.c.print = parameterList->get<int>("print", 3);
353  data_.c.nmethods = parameterList->get<int>("nmethods", 0); // tries AMD and may try METIS if necessary and available
354 
355  // useGPU = 1 means use GPU if possible - note this must be set after cholmod_l_start
356  // since cholmod_l_start sets it to the default value of -1.
357  // Setting to -1 means GPU is only used if env variable CHOLMOD_USE_GPU is set to 1.
358 #ifdef KOKKOS_ENABLE_CUDA
359  const int default_gpu_setting = 1; // use gpu by default
360 #else
361  // If building Trilinos with Cuda off, Cholmod can stil use GPU and the solver will still work.
362  // But that is likely not what was expected/wanted. If you do still want it, set the param to 1.
363  const int default_gpu_setting = 0; // use gpu by default
364 #endif
365 
366  data_.c.useGPU = parameterList->get<int>("useGPU", default_gpu_setting);
367 
368  bool bSuperNodal = parameterList->get<bool>("SuperNodal", false);
369  data_.c.supernodal = bSuperNodal ? CHOLMOD_SUPERNODAL : CHOLMOD_AUTO;
370 }
371 
372 
373 template <class Matrix, class Vector>
374 Teuchos::RCP<const Teuchos::ParameterList>
376 {
377  using std::string;
378  using Teuchos::tuple;
379  using Teuchos::ParameterList;
380  using Teuchos::EnhancedNumberValidator;
381  using Teuchos::setStringToIntegralParameter;
382  using Teuchos::stringToIntegralParameterEntryValidator;
383 
384  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
385 
386  if( is_null(valid_params) ){
387  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
388 
389 
390  Teuchos::RCP<EnhancedNumberValidator<int> > print_validator
391  = Teuchos::rcp( new EnhancedNumberValidator<int>(0,5));
392 
393  Teuchos::RCP<EnhancedNumberValidator<int> > nmethods_validator
394  = Teuchos::rcp( new EnhancedNumberValidator<int>(0,9));
395 
396  pl->set("nmethods", 0, "Specifies the number of different ordering methods to try", nmethods_validator);
397 
398  pl->set("print", 3, "Specifies the verbosity of the print statements", print_validator);
399 
400  pl->set("dbound", 0.0,
401  "Specifies the smallest absolute value on the diagonal D for the LDL' factorization");
402 
403 
404  pl->set("Equil", true, "Whether to equilibrate the system before solve");
405 
406  pl->set("PreferUpper", true,
407  "Specifies whether the matrix will be "
408  "stored in upper triangular form.");
409 
410  pl->set("useGPU", -1, "1: Use GPU is 1, 0: Do not use GPU, -1: ENV CHOLMOD_USE_GPU set GPU usage.");
411 
412  pl->set("Enable_KokkosKernels_TriangularSolves", false, "Whether to use triangular solves.");
413 
414  pl->set("IsContiguous", true, "Whether GIDs contiguous");
415 
416  pl->set("SuperNodal", false, "Whether to use super nodal");
417 
418  valid_params = pl;
419  }
420 
421  return valid_params;
422 }
423 
424 
425 template <class Matrix, class Vector>
426 bool
428 {
429 #ifdef HAVE_AMESOS2_TIMERS
430  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
431 #endif
432 
433  // Only the root image needs storage allocated
434 
435  Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
436  Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
437  Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
438 
439  long nnz_ret = 0;
440  {
441 #ifdef HAVE_AMESOS2_TIMERS
442  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
443 #endif
444 
445  TEUCHOS_TEST_FOR_EXCEPTION(this->rowIndexBase_ != this->columnIndexBase_,
446  std::runtime_error,
447  "Row and column maps have different indexbase ");
448 
449  if ( is_contiguous_ == true ) {
450  Util::get_ccs_helper_kokkos_view<
451  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
452  host_size_type_array>::do_get(this->matrixA_.ptr(),
453  host_nzvals_view_, host_rows_view_,
454  host_col_ptr_view_, nnz_ret, ROOTED,
455  ARBITRARY,
456  this->rowIndexBase_);
457  }
458  else {
459  Util::get_ccs_helper_kokkos_view<
460  MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_type_array,
461  host_size_type_array>::do_get(this->matrixA_.ptr(),
462  host_nzvals_view_, host_rows_view_,
463  host_col_ptr_view_, nnz_ret, CONTIGUOUS_AND_ROOTED,
464  ARBITRARY,
465  this->rowIndexBase_);
466  }
467  }
468 
469  TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
470  std::runtime_error,
471  "Did not get the expected number of non-zero vals");
472 
473  function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
474  Teuchos::as<size_t>(this->globalNumCols_),
475  Teuchos::as<size_t>(this->globalNumNonZeros_),
476  0,
477  host_col_ptr_view_.data(),
478  host_nzvals_view_.data(),
479  host_rows_view_.data(),
480  &(data_.A));
481 
482  TEUCHOS_TEST_FOR_EXCEPTION(data_.A.stype == 0, std::runtime_error,
483  "CHOLMOD loadA_impl loaded matrix but it is not symmetric.");
484 
485  return true;
486 }
487 
488 template <class Matrix, class Vector>
489 void
491 {
492 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
493  // Create handles for U and U^T solves
494  device_khL_.create_sptrsv_handle(
495  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, true);
496  device_khU_.create_sptrsv_handle(
497  KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n, false);
498 
499  // extract etree and iperm from CHOLMOD
500  long *long_etree = static_cast<long*>(data_.c.Iwork) + 2 * data_.L->n;
501  Kokkos::resize(host_trsv_etree_, data_.L->nsuper);
502  for (size_t i = 0 ; i < data_.L->nsuper; ++i) { // convert long to int array for trsv API
503  host_trsv_etree_(i) = long_etree[i];
504  }
505 
506  // set etree
507  device_khL_.set_sptrsv_etree(host_trsv_etree_.data());
508  device_khU_.set_sptrsv_etree(host_trsv_etree_.data());
509 
510  size_t ld_rhs = this->matrixA_->getGlobalNumRows();
511  Kokkos::resize(host_trsv_perm_, ld_rhs);
512  long *iperm = static_cast<long*>(data_.L->Perm);
513  for (size_t i = 0; i < ld_rhs; i++) { // convert long to int array for trsv API
514  host_trsv_perm_(iperm[i]) = i;
515  }
516  deep_copy_or_assign_view(device_trsv_perm_, host_trsv_perm_); // will use device to permute
517 
518  // set permutation
519  device_khL_.set_sptrsv_perm(host_trsv_perm_.data());
520  device_khU_.set_sptrsv_perm(host_trsv_perm_.data());
521 
522  // Do symbolic analysis
523  KokkosSparse::Experimental::sptrsv_symbolic<long, kernel_handle_type>
524  (&device_khL_, &device_khU_, data_.L, &data_.c);
525 #endif
526 }
527 
528 template <class Matrix, class Vector>
529 void
530 Cholmod<Matrix,Vector>::triangular_solve_numeric()
531 {
532 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
533  // Do numerical compute
534  KokkosSparse::Experimental::sptrsv_compute<long, kernel_handle_type>
535  (&device_khL_, &device_khU_, data_.L, &data_.c);
536 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE
537 }
538 
539 template <class Matrix, class Vector>
540 void
541 Cholmod<Matrix,Vector>::triangular_solve() const
542 {
543 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
544  size_t ld_rhs = device_xValues_.extent(0);
545  size_t nrhs = device_xValues_.extent(1);
546 
547  Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
548  Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
549 
550  // forward pivot
551  auto local_device_bValues = device_bValues_;
552  auto local_device_trsv_perm = device_trsv_perm_;
553  auto local_device_trsv_rhs = device_trsv_rhs_;
554  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
555  KOKKOS_LAMBDA(size_t j) {
556  for(size_t k = 0; k < nrhs; ++k) {
557  local_device_trsv_rhs(local_device_trsv_perm(j),k) = local_device_bValues(j,k);
558  }
559  });
560 
561  for(size_t k = 0; k < nrhs; ++k) { // sptrsv_solve does not batch
562  auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
563  auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
564 
565  // do L solve= - numeric (only rhs is modified) on the default device/host space
566  KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
567 
568  // do L^T solve - numeric (only rhs is modified) on the default device/host space
569  KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
570  } // end loop over rhs vectors
571 
572  // backward pivot
573  auto local_device_xValues = device_xValues_;
574  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
575  KOKKOS_LAMBDA(size_t j) {
576  for(size_t k = 0; k < nrhs; ++k) {
577  local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm(j),k);
578  }
579  });
580 #endif
581 }
582 
583 template<class Matrix, class Vector>
584 const char* Cholmod<Matrix,Vector>::name = "Cholmod";
585 
586 
587 } // end namespace Amesos2
588 
589 #endif // AMESOS2_CHOLMOD_DEF_HPP
~Cholmod()
Destructor.
Definition: Amesos2_Cholmod_def.hpp:98
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Cholmod_def.hpp:112
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Cholmod_def.hpp:375
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Cholmod_def.hpp:324
Cholmod(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Cholmod_def.hpp:71
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
CHOLMOD specific solve.
Definition: Amesos2_Cholmod_def.hpp:224
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Cholmod_def.hpp:332
Amesos2 CHOLMOD declarations.
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Cholmod_def.hpp:427
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using CHOLMOD.
Definition: Amesos2_Cholmod_def.hpp:137
Amesos2 interface to the CHOLMOD package.
Definition: Amesos2_Cholmod_decl.hpp:76
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
int numericFactorization_impl()
CHOLMOD specific numeric factorization.
Definition: Amesos2_Cholmod_def.hpp:175