Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_HIP.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
12 
13 #include "Tpetra_Details_IntRowPtrHelper.hpp"
14 
15 #ifdef HAVE_TPETRA_INST_HIP
16 namespace Tpetra {
17 namespace MMdetails {
18 
19 /*********************************************************************************************************/
20 // MMM KernelWrappers for Partial Specialization to HIP
21 template <class Scalar,
22  class LocalOrdinal,
23  class GlobalOrdinal,
24  class LocalOrdinalViewType>
25 struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType> {
26  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
27  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
28  const LocalOrdinalViewType& Acol2Brow,
29  const LocalOrdinalViewType& Acol2Irow,
30  const LocalOrdinalViewType& Bcol2Ccol,
31  const LocalOrdinalViewType& Icol2Ccol,
32  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
33  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
34  const std::string& label = std::string(),
35  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36 
37  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
38  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
39  const LocalOrdinalViewType& Acol2Brow,
40  const LocalOrdinalViewType& Acol2Irow,
41  const LocalOrdinalViewType& Bcol2Ccol,
42  const LocalOrdinalViewType& Icol2Ccol,
43  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
44  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
45  const std::string& label = std::string(),
46  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
47 };
48 
49 // Jacobi KernelWrappers for Partial Specialization to HIP
50 template <class Scalar,
51  class LocalOrdinal,
52  class GlobalOrdinal, class LocalOrdinalViewType>
53 struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType> {
54  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
55  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
56  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
57  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
58  const LocalOrdinalViewType& Acol2Brow,
59  const LocalOrdinalViewType& Acol2Irow,
60  const LocalOrdinalViewType& Bcol2Ccol,
61  const LocalOrdinalViewType& Icol2Ccol,
62  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
63  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
64  const std::string& label = std::string(),
65  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66 
67  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
68  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
69  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
70  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
71  const LocalOrdinalViewType& Acol2Brow,
72  const LocalOrdinalViewType& Acol2Irow,
73  const LocalOrdinalViewType& Bcol2Ccol,
74  const LocalOrdinalViewType& Icol2Ccol,
75  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
76  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
77  const std::string& label = std::string(),
78  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
79 
80  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
81  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
82  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
83  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
84  const LocalOrdinalViewType& Acol2Brow,
85  const LocalOrdinalViewType& Acol2Irow,
86  const LocalOrdinalViewType& Bcol2Ccol,
87  const LocalOrdinalViewType& Icol2Ccol,
88  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
89  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
90  const std::string& label = std::string(),
91  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
92 };
93 
94 /*********************************************************************************************************/
95 // AB NewMatrix Kernel wrappers (KokkosKernels/HIP Version)
96 template <class Scalar,
97  class LocalOrdinal,
98  class GlobalOrdinal,
99  class LocalOrdinalViewType>
100 void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
101  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
102  const LocalOrdinalViewType& Acol2Brow,
103  const LocalOrdinalViewType& Acol2Irow,
104  const LocalOrdinalViewType& Bcol2Ccol,
105  const LocalOrdinalViewType& Icol2Ccol,
106  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
107  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
108  const std::string& label,
109  const Teuchos::RCP<Teuchos::ParameterList>& params) {
110  // Node-specific code
111  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
112  std::string nodename("HIP");
113 
114  // Lots and lots of typedefs
115  using Teuchos::RCP;
116  using Teuchos::rcp;
118  typedef typename KCRS::device_type device_t;
119  typedef typename KCRS::StaticCrsGraphType graph_t;
120  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
121  using int_view_t = Kokkos::View<int*, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
122  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
123  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
124  typedef typename KCRS::values_type::non_const_type scalar_view_t;
125  // typedef typename graph_t::row_map_type::const_type lno_view_t_const;
126 
127  RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIPWrapper"));
128 
129  // Options
130  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
131  std::string myalg("SPGEMM_KK_MEMORY");
132  if (!params.is_null()) {
133  if (params->isParameter("hip: algorithm"))
134  myalg = params->get("hip: algorithm", myalg);
135  if (params->isParameter("hip: team work size"))
136  team_work_size = params->get("hip: team work size", team_work_size);
137  }
138 
139  // KokkosKernelsHandle
140  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
141  typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
142  typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
143  KernelHandle;
144  using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
145  typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
146  typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
147 
148  // Grab the Kokkos::SparseCrsMatrices
149  const KCRS& Amat = Aview.origMatrix->getLocalMatrixDevice();
150  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixDevice();
151 
152  c_lno_view_t Arowptr = Amat.graph.row_map,
153  Browptr = Bmat.graph.row_map;
154  const lno_nnz_view_t Acolind = Amat.graph.entries,
155  Bcolind = Bmat.graph.entries;
156  const scalar_view_t Avals = Amat.values,
157  Bvals = Bmat.values;
158 
159  // Get the algorithm mode
160  std::string alg = nodename + std::string(" algorithm");
161  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
162  if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
163  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
164 
165  // Merge the B and Bimport matrices
166  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
167 
168  MM = Teuchos::null;
169  MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP Core"));
170 
171  // Do the multiply on whatever we've got
172  typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
173  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
174  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
175 
176  // regardless of whether integer row ptrs are used, need to ultimately produce the row pointer, entries, and values of the expected types
177  lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
178  lno_nnz_view_t entriesC;
179  scalar_view_t valuesC;
180 
181  Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
182  const bool useIntRowptrs =
183  irph.shouldUseIntRowptrs() &&
184  Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
185 
186  if (useIntRowptrs) {
187  IntKernelHandle kh;
188  kh.create_spgemm_handle(alg_enum);
189  kh.set_team_work_size(team_work_size);
190 
191  int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
192 
193  auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
194  auto Bint = irph.getIntRowptrMatrix(Bmerged);
195 
196  {
197  Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels symbolic int");
198  KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, false, Bint.graph.row_map, Bint.graph.entries, false, int_row_mapC);
199  }
200  Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels numeric int");
201 
202  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
203  if (c_nnz_size) {
204  entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
205  valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
206  }
207  KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, Aint.values, false, Bint.graph.row_map, Bint.graph.entries, Bint.values, false, int_row_mapC, entriesC, valuesC);
208  // transfer the integer rowptrs back to the correct rowptr type
209  Kokkos::parallel_for(
210  int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
211  kh.destroy_spgemm_handle();
212  } else {
213  KernelHandle kh;
214  kh.create_spgemm_handle(alg_enum);
215  kh.set_team_work_size(team_work_size);
216 
217  {
218  Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels symbolic non-int");
219  KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
220  }
221 
222  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
223  if (c_nnz_size) {
224  entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
225  valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
226  }
227 
228  Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels numeric non-int");
229  KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, Amat.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
230  kh.destroy_spgemm_handle();
231  }
232 
233  MM = Teuchos::null;
234  MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP Sort"));
235 
236  // Sort & set values
237  if (params.is_null() || params->get("sort entries", true))
238  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
239  C.setAllValues(row_mapC, entriesC, valuesC);
240 
241  MM = Teuchos::null;
242  MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP ESFC"));
243 
244  // Final Fillcomplete
245  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
246  labelList->set("Timer Label", label);
247  if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
248  RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
249  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
250 }
251 
252 /*********************************************************************************************************/
253 template <class Scalar,
254  class LocalOrdinal,
255  class GlobalOrdinal,
256  class LocalOrdinalViewType>
257 void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
258  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
259  const LocalOrdinalViewType& targetMapToOrigRow_dev,
260  const LocalOrdinalViewType& targetMapToImportRow_dev,
261  const LocalOrdinalViewType& Bcol2Ccol_dev,
262  const LocalOrdinalViewType& Icol2Ccol_dev,
263  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
264  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
265  const std::string& label,
266  const Teuchos::RCP<Teuchos::ParameterList>& params) {
267  // FIXME: Right now, this is a cut-and-paste of the serial kernel
268  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
269  using Teuchos::RCP;
270  using Teuchos::rcp;
271 
272  RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Reuse SerialCore"));
273 
274  // Lots and lots of typedefs
275  typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
276  typedef typename KCRS::StaticCrsGraphType graph_t;
277  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
278  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
279  typedef typename KCRS::values_type::non_const_type scalar_view_t;
280 
281  typedef Scalar SC;
282  typedef LocalOrdinal LO;
283  typedef GlobalOrdinal GO;
284  typedef Node NO;
285  typedef Map<LO, GO, NO> map_type;
286  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
287  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
288  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
289 
290  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
291  // KDDKDD UVM Ideally, this function would run on device and use
292  // KDDKDD UVM KokkosKernels instead of this host implementation.
293  auto targetMapToOrigRow =
294  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
295  targetMapToOrigRow_dev);
296  auto targetMapToImportRow =
297  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
298  targetMapToImportRow_dev);
299  auto Bcol2Ccol =
300  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
301  Bcol2Ccol_dev);
302  auto Icol2Ccol =
303  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
304  Icol2Ccol_dev);
305 
306  // Sizes
307  RCP<const map_type> Ccolmap = C.getColMap();
308  size_t m = Aview.origMatrix->getLocalNumRows();
309  size_t n = Ccolmap->getLocalNumElements();
310 
311  // Grab the Kokkos::SparseCrsMatrices & inner stuff
312  const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
313  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
314  const KCRS& Cmat = C.getLocalMatrixHost();
315 
316  c_lno_view_t Arowptr = Amat.graph.row_map,
317  Browptr = Bmat.graph.row_map,
318  Crowptr = Cmat.graph.row_map;
319  const lno_nnz_view_t Acolind = Amat.graph.entries,
320  Bcolind = Bmat.graph.entries,
321  Ccolind = Cmat.graph.entries;
322  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
323  scalar_view_t Cvals = Cmat.values;
324 
325  c_lno_view_t Irowptr;
326  lno_nnz_view_t Icolind;
327  scalar_view_t Ivals;
328  if (!Bview.importMatrix.is_null()) {
329  auto lclB = Bview.importMatrix->getLocalMatrixHost();
330  Irowptr = lclB.graph.row_map;
331  Icolind = lclB.graph.entries;
332  Ivals = lclB.values;
333  }
334 
335  auto MM2 = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix SerialCore - Compare"));
336 
337  // Classic csr assembly (low memory edition)
338  // mfh 27 Sep 2016: The c_status array is an implementation detail
339  // of the local sparse matrix-matrix multiply routine.
340 
341  // The status array will contain the index into colind where this entry was last deposited.
342  // c_status[i] < CSR_ip - not in the row yet
343  // c_status[i] >= CSR_ip - this is the entry where you can find the data
344  // We start with this filled with INVALID's indicating that there are no entries yet.
345  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
346  std::vector<size_t> c_status(n, ST_INVALID);
347 
348  // For each row of A/C
349  size_t CSR_ip = 0, OLD_ip = 0;
350  for (size_t i = 0; i < m; i++) {
351  // First fill the c_status array w/ locations where we're allowed to
352  // generate nonzeros for this row
353  OLD_ip = Crowptr[i];
354  CSR_ip = Crowptr[i + 1];
355  for (size_t k = OLD_ip; k < CSR_ip; k++) {
356  c_status[Ccolind[k]] = k;
357 
358  // Reset values in the row of C
359  Cvals[k] = SC_ZERO;
360  }
361 
362  for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
363  LO Aik = Acolind[k];
364  const SC Aval = Avals[k];
365  if (Aval == SC_ZERO)
366  continue;
367 
368  if (targetMapToOrigRow[Aik] != LO_INVALID) {
369  // Local matrix
370  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
371 
372  for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
373  LO Bkj = Bcolind[j];
374  LO Cij = Bcol2Ccol[Bkj];
375 
376  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
377  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
378  << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
379 
380  Cvals[c_status[Cij]] += Aval * Bvals[j];
381  }
382 
383  } else {
384  // Remote matrix
385  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
386  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
387  LO Ikj = Icolind[j];
388  LO Cij = Icol2Ccol[Ikj];
389 
390  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
391  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph "
392  << "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
393 
394  Cvals[c_status[Cij]] += Aval * Ivals[j];
395  }
396  }
397  }
398  }
399 
400  C.fillComplete(C.getDomainMap(), C.getRangeMap());
401 }
402 
403 /*********************************************************************************************************/
404 template <class Scalar,
405  class LocalOrdinal,
406  class GlobalOrdinal,
407  class LocalOrdinalViewType>
408 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
409  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
410  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
411  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
412  const LocalOrdinalViewType& Acol2Brow,
413  const LocalOrdinalViewType& Acol2Irow,
414  const LocalOrdinalViewType& Bcol2Ccol,
415  const LocalOrdinalViewType& Icol2Ccol,
416  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
417  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
418  const std::string& label,
419  const Teuchos::RCP<Teuchos::ParameterList>& params) {
420  // Node-specific code
421  using Teuchos::RCP;
422 
423  // Options
424  // int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
425  std::string myalg("KK");
426  if (!params.is_null()) {
427  if (params->isParameter("hip: jacobi algorithm"))
428  myalg = params->get("hip: jacobi algorithm", myalg);
429  }
430 
431  if (myalg == "MSAK") {
432  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
433  } else if (myalg == "KK") {
434  jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
435  } else {
436  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
437  }
438 
439  Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPESFC");
440 
441  // Final Fillcomplete
442  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
443  labelList->set("Timer Label", label);
444  if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
445 
446  // NOTE: MSAK already fillCompletes, so we have to check here
447  if (!C.isFillComplete()) {
448  RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
449  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
450  }
451 }
452 
453 /*********************************************************************************************************/
454 template <class Scalar,
455  class LocalOrdinal,
456  class GlobalOrdinal,
457  class LocalOrdinalViewType>
458 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
459  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
460  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
461  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
462  const LocalOrdinalViewType& targetMapToOrigRow_dev,
463  const LocalOrdinalViewType& targetMapToImportRow_dev,
464  const LocalOrdinalViewType& Bcol2Ccol_dev,
465  const LocalOrdinalViewType& Icol2Ccol_dev,
466  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
467  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
468  const std::string& label,
469  const Teuchos::RCP<Teuchos::ParameterList>& params) {
470  // FIXME: Right now, this is a cut-and-paste of the serial kernel
471  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
472 
473  using Teuchos::RCP;
474  using Teuchos::rcp;
475  RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse HIPCore"));
476 
477  // Lots and lots of typedefs
478  typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
479  typedef typename KCRS::StaticCrsGraphType graph_t;
480  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
481  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
482  typedef typename KCRS::values_type::non_const_type scalar_view_t;
483  typedef typename scalar_view_t::memory_space scalar_memory_space;
484 
485  typedef Scalar SC;
486  typedef LocalOrdinal LO;
487  typedef GlobalOrdinal GO;
488  typedef Node NO;
489  typedef Map<LO, GO, NO> map_type;
490  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
491  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
492  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
493 
494  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
495  // KDDKDD UVM Ideally, this function would run on device and use
496  // KDDKDD UVM KokkosKernels instead of this host implementation.
497  auto targetMapToOrigRow =
498  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
499  targetMapToOrigRow_dev);
500  auto targetMapToImportRow =
501  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
502  targetMapToImportRow_dev);
503  auto Bcol2Ccol =
504  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
505  Bcol2Ccol_dev);
506  auto Icol2Ccol =
507  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
508  Icol2Ccol_dev);
509 
510  // Sizes
511  RCP<const map_type> Ccolmap = C.getColMap();
512  size_t m = Aview.origMatrix->getLocalNumRows();
513  size_t n = Ccolmap->getLocalNumElements();
514 
515  // Grab the Kokkos::SparseCrsMatrices & inner stuff
516  const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
517  const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
518  const KCRS& Cmat = C.getLocalMatrixHost();
519 
520  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
521  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
522  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
523  scalar_view_t Cvals = Cmat.values;
524 
525  c_lno_view_t Irowptr;
526  lno_nnz_view_t Icolind;
527  scalar_view_t Ivals;
528  if (!Bview.importMatrix.is_null()) {
529  auto lclB = Bview.importMatrix->getLocalMatrixHost();
530  Irowptr = lclB.graph.row_map;
531  Icolind = lclB.graph.entries;
532  Ivals = lclB.values;
533  }
534 
535  // Jacobi-specific inner stuff
536  auto Dvals =
537  Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
538 
539  RCP<Tpetra::Details::ProfilingRegion> MM2 = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse HIPCore - Compare"));
540 
541  // The status array will contain the index into colind where this entry was last deposited.
542  // c_status[i] < CSR_ip - not in the row yet
543  // c_status[i] >= CSR_ip - this is the entry where you can find the data
544  // We start with this filled with INVALID's indicating that there are no entries yet.
545  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
546  std::vector<size_t> c_status(n, ST_INVALID);
547 
548  // For each row of A/C
549  size_t CSR_ip = 0, OLD_ip = 0;
550  for (size_t i = 0; i < m; i++) {
551  // First fill the c_status array w/ locations where we're allowed to
552  // generate nonzeros for this row
553  OLD_ip = Crowptr[i];
554  CSR_ip = Crowptr[i + 1];
555  for (size_t k = OLD_ip; k < CSR_ip; k++) {
556  c_status[Ccolind[k]] = k;
557 
558  // Reset values in the row of C
559  Cvals[k] = SC_ZERO;
560  }
561 
562  SC minusOmegaDval = -omega * Dvals(i, 0);
563 
564  // Entries of B
565  for (size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
566  Scalar Bval = Bvals[j];
567  if (Bval == SC_ZERO)
568  continue;
569  LO Bij = Bcolind[j];
570  LO Cij = Bcol2Ccol[Bij];
571 
572  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
573  std::runtime_error, "Trying to insert a new entry into a static graph");
574 
575  Cvals[c_status[Cij]] = Bvals[j];
576  }
577 
578  // Entries of -omega * Dinv * A * B
579  for (size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
580  LO Aik = Acolind[k];
581  const SC Aval = Avals[k];
582  if (Aval == SC_ZERO)
583  continue;
584 
585  if (targetMapToOrigRow[Aik] != LO_INVALID) {
586  // Local matrix
587  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
588 
589  for (size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
590  LO Bkj = Bcolind[j];
591  LO Cij = Bcol2Ccol[Bkj];
592 
593  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
594  std::runtime_error, "Trying to insert a new entry into a static graph");
595 
596  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
597  }
598 
599  } else {
600  // Remote matrix
601  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
602  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
603  LO Ikj = Icolind[j];
604  LO Cij = Icol2Ccol[Ikj];
605 
606  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
607  std::runtime_error, "Trying to insert a new entry into a static graph");
608 
609  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
610  }
611  }
612  }
613  }
614 
615  MM = Teuchos::null;
616  MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse ESFC"));
617 
618  C.fillComplete(C.getDomainMap(), C.getRangeMap());
619 }
620 
621 /*********************************************************************************************************/
622 template <class Scalar,
623  class LocalOrdinal,
624  class GlobalOrdinal,
625  class LocalOrdinalViewType>
626 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
627  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Dinv,
628  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
629  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
630  const LocalOrdinalViewType& Acol2Brow,
631  const LocalOrdinalViewType& Acol2Irow,
632  const LocalOrdinalViewType& Bcol2Ccol,
633  const LocalOrdinalViewType& Icol2Ccol,
634  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
635  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
636  const std::string& label,
637  const Teuchos::RCP<Teuchos::ParameterList>& params) {
638  // Check if the diagonal entries exist in debug mode
639  const bool debug = Tpetra::Details::Behavior::debug();
640  if (debug) {
641  auto rowMap = Aview.origMatrix->getRowMap();
642  Tpetra::Vector<Scalar> diags(rowMap);
643  Aview.origMatrix->getLocalDiagCopy(diags);
644  size_t diagLength = rowMap->getLocalNumElements();
645  Teuchos::Array<Scalar> diagonal(diagLength);
646  diags.get1dCopy(diagonal());
647 
648  for (size_t i = 0; i < diagLength; ++i) {
649  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
650  std::runtime_error,
651  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
652  << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
653  }
654  }
655 
656  using Teuchos::RCP;
657  using Teuchos::rcp;
658  RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix KokkosKernels"));
659 
660  // Usings
661  using device_t = typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
663  using graph_t = typename matrix_t::StaticCrsGraphType;
664  using lno_view_t = typename graph_t::row_map_type::non_const_type;
665  using int_view_t = Kokkos::View<int*,
666  typename lno_view_t::array_layout,
667  typename lno_view_t::memory_space,
668  typename lno_view_t::memory_traits>;
669  using c_lno_view_t = typename graph_t::row_map_type::const_type;
670  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
671  using scalar_view_t = typename matrix_t::values_type::non_const_type;
672 
673  // KokkosKernels handle
674  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
675  typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
676  typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
677  using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
678  typename int_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
679  typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
680 
681  // Merge the B and Bimport matrices
682  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
683 
684  // Get the properties and arrays of input matrices
685  const matrix_t& Amat = Aview.origMatrix->getLocalMatrixDevice();
686  const matrix_t& Bmat = Bview.origMatrix->getLocalMatrixDevice();
687 
688  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
689  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
690  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
691 
692  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
693  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
694  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
695 
696  // Arrays of the output matrix
697  lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
698  lno_nnz_view_t entriesC;
699  scalar_view_t valuesC;
700 
701  // Options
702  int team_work_size = 16;
703  std::string myalg("SPGEMM_KK_MEMORY");
704  if (!params.is_null()) {
705  if (params->isParameter("hip: algorithm"))
706  myalg = params->get("hip: algorithm", myalg);
707  if (params->isParameter("hip: team work size"))
708  team_work_size = params->get("hip: team work size", team_work_size);
709  }
710 
711  // Get the algorithm mode
712  std::string nodename("HIP");
713  std::string alg = nodename + std::string(" algorithm");
714  if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
715  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
716 
717  Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
718  const bool useIntRowptrs =
719  irph.shouldUseIntRowptrs() &&
720  Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
721 
722  if (useIntRowptrs) {
723  int_handle_t kh;
724  kh.create_spgemm_handle(alg_enum);
725  kh.set_team_work_size(team_work_size);
726 
727  int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
728 
729  auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
730  auto Bint = irph.getIntRowptrMatrix(Bmerged);
731  {
732  Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels symbolic int");
733 
734  KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
735  Aint.graph.row_map, Aint.graph.entries, false,
736  Bint.graph.row_map, Bint.graph.entries, false,
737  int_row_mapC);
738  }
739 
740  Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels numeric int");
741 
742  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
743  if (c_nnz_size) {
744  entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
745  valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
746  }
747 
748  // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
749  // so need to have a special int-typed call for this as well.
750  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
751  Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
752  Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
753  int_row_mapC, entriesC, valuesC,
754  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
755  // transfer the integer rowptrs back to the correct rowptr type
756  Kokkos::parallel_for(
757  int_row_mapC.size(), KOKKOS_LAMBDA(int i) { row_mapC(i) = int_row_mapC(i); });
758  kh.destroy_spgemm_handle();
759  } else {
760  handle_t kh;
761  kh.create_spgemm_handle(alg_enum);
762  kh.set_team_work_size(team_work_size);
763 
764  {
765  Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels symbolic non-int");
766 
767  KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
768  Amat.graph.row_map, Amat.graph.entries, false,
769  Bmerged.graph.row_map, Bmerged.graph.entries, false,
770  row_mapC);
771  }
772 
773  Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels numeric non-int");
774 
775  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
776  if (c_nnz_size) {
777  entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
778  valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
779  }
780  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
781  Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
782  Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
783  row_mapC, entriesC, valuesC,
784  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
785  kh.destroy_spgemm_handle();
786  }
787 
788  MM = Teuchos::null;
789  MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPSort"));
790 
791  // Sort & set values
792  if (params.is_null() || params->get("sort entries", true))
793  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
794  C.setAllValues(row_mapC, entriesC, valuesC);
795 
796  MM = Teuchos::null;
797  MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPESFC"));
798 
799  // Final Fillcomplete
800  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
801  labelList->set("Timer Label", label);
802  if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
803  Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
804  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
805 }
806 
807 } // namespace MMdetails
808 } // namespace Tpetra
809 
810 #endif // HIP
811 
812 #endif
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.