Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_HIP.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
12 
13 #include "Tpetra_Details_IntRowPtrHelper.hpp"
14 
15 #ifdef HAVE_TPETRA_INST_HIP
16 namespace Tpetra {
17 namespace MMdetails {
18 
19 /*********************************************************************************************************/
20 // MMM KernelWrappers for Partial Specialization to HIP
21 template<class Scalar,
22  class LocalOrdinal,
23  class GlobalOrdinal,
24  class LocalOrdinalViewType>
25 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
26  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
27  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
28  const LocalOrdinalViewType & Acol2Brow,
29  const LocalOrdinalViewType & Acol2Irow,
30  const LocalOrdinalViewType & Bcol2Ccol,
31  const LocalOrdinalViewType & Icol2Ccol,
32  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
33  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
34  const std::string& label = std::string(),
35  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36 
37 
38 
39  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
40  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
41  const LocalOrdinalViewType & Acol2Brow,
42  const LocalOrdinalViewType & Acol2Irow,
43  const LocalOrdinalViewType & Bcol2Ccol,
44  const LocalOrdinalViewType & Icol2Ccol,
45  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
46  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
47  const std::string& label = std::string(),
48  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
49 
50 };
51 
52 // Jacobi KernelWrappers for Partial Specialization to HIP
53 template<class Scalar,
54  class LocalOrdinal,
55  class GlobalOrdinal, class LocalOrdinalViewType>
56 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
57  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
58  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
59  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
60  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
61  const LocalOrdinalViewType & Acol2Brow,
62  const LocalOrdinalViewType & Acol2Irow,
63  const LocalOrdinalViewType & Bcol2Ccol,
64  const LocalOrdinalViewType & Icol2Ccol,
65  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
66  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
67  const std::string& label = std::string(),
68  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69 
70  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
71  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
72  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
73  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
74  const LocalOrdinalViewType & Acol2Brow,
75  const LocalOrdinalViewType & Acol2Irow,
76  const LocalOrdinalViewType & Bcol2Ccol,
77  const LocalOrdinalViewType & Icol2Ccol,
78  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
79  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
80  const std::string& label = std::string(),
81  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82 
83  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
84  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
85  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
86  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
87  const LocalOrdinalViewType & Acol2Brow,
88  const LocalOrdinalViewType & Acol2Irow,
89  const LocalOrdinalViewType & Bcol2Ccol,
90  const LocalOrdinalViewType & Icol2Ccol,
91  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
92  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
93  const std::string& label = std::string(),
94  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
95 };
96 
97 
98 /*********************************************************************************************************/
99 // AB NewMatrix Kernel wrappers (KokkosKernels/HIP Version)
100 template<class Scalar,
101  class LocalOrdinal,
102  class GlobalOrdinal,
103  class LocalOrdinalViewType>
104 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
105  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
106  const LocalOrdinalViewType & Acol2Brow,
107  const LocalOrdinalViewType & Acol2Irow,
108  const LocalOrdinalViewType & Bcol2Ccol,
109  const LocalOrdinalViewType & Icol2Ccol,
110  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
111  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
112  const std::string& label,
113  const Teuchos::RCP<Teuchos::ParameterList>& params) {
114 
115 #ifdef HAVE_TPETRA_MMM_TIMINGS
116  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
117  using Teuchos::TimeMonitor;
118  Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPWrapper")))));
119 #endif
120  // Node-specific code
121  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
122  std::string nodename("HIP");
123 
124  // Lots and lots of typedefs
125  using Teuchos::RCP;
127  typedef typename KCRS::device_type device_t;
128  typedef typename KCRS::StaticCrsGraphType graph_t;
129  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
130  using int_view_t = Kokkos::View<int *, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
131  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
132  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
133  typedef typename KCRS::values_type::non_const_type scalar_view_t;
134  //typedef typename graph_t::row_map_type::const_type lno_view_t_const;
135 
136  // Options
137  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
138  std::string myalg("SPGEMM_KK_MEMORY");
139  if(!params.is_null()) {
140  if(params->isParameter("hip: algorithm"))
141  myalg = params->get("hip: algorithm",myalg);
142  if(params->isParameter("hip: team work size"))
143  team_work_size = params->get("hip: team work size",team_work_size);
144  }
145 
146  // KokkosKernelsHandle
147  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
148  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
149  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
150  using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
151  typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
152  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
153 
154  // Grab the Kokkos::SparseCrsMatrices
155  const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
156  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
157 
158  c_lno_view_t Arowptr = Amat.graph.row_map,
159  Browptr = Bmat.graph.row_map;
160  const lno_nnz_view_t Acolind = Amat.graph.entries,
161  Bcolind = Bmat.graph.entries;
162  const scalar_view_t Avals = Amat.values,
163  Bvals = Bmat.values;
164 
165  // Get the algorithm mode
166  std::string alg = nodename+std::string(" algorithm");
167  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
168  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
169  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
170 
171  // Merge the B and Bimport matrices
172  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
173 
174 #ifdef HAVE_TPETRA_MMM_TIMINGS
175  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPCore"))));
176 #endif
177 
178  // Do the multiply on whatever we've got
179  typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
180  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
181  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
182 
183  // regardless of whether integer row ptrs are used, need to ultimately produce the row pointer, entries, and values of the expected types
184  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
185  lno_nnz_view_t entriesC;
186  scalar_view_t valuesC;
187 
188  Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
189  const bool useIntRowptrs =
190  irph.shouldUseIntRowptrs() &&
191  Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
192 
193  if (useIntRowptrs) {
194  IntKernelHandle kh;
195  kh.create_spgemm_handle(alg_enum);
196  kh.set_team_work_size(team_work_size);
197 
198  int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
199 
200  auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
201  auto Bint = irph.getIntRowptrMatrix(Bmerged);
202  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,false,Bint.graph.row_map,Bint.graph.entries,false, int_row_mapC);
203 
204  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
205  if (c_nnz_size){
206  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
207  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
208  }
209  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,Aint.values,false,Bint.graph.row_map,Bint.graph.entries,Bint.values,false,int_row_mapC,entriesC,valuesC);
210  // transfer the integer rowptrs back to the correct rowptr type
211  Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
212  kh.destroy_spgemm_handle();
213  } else {
214  KernelHandle kh;
215  kh.create_spgemm_handle(alg_enum);
216  kh.set_team_work_size(team_work_size);
217 
218  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
219 
220  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
221  if (c_nnz_size){
222  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
223  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
224  }
225 
226  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
227  kh.destroy_spgemm_handle();
228  }
229 
230 #ifdef HAVE_TPETRA_MMM_TIMINGS
231  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPSort"))));
232 #endif
233 
234  // Sort & set values
235  if (params.is_null() || params->get("sort entries",true))
236  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
237  C.setAllValues(row_mapC,entriesC,valuesC);
238 
239 #ifdef HAVE_TPETRA_MMM_TIMINGS
240  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPESFC"))));
241 #endif
242 
243  // Final Fillcomplete
244  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
245  labelList->set("Timer Label",label);
246  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
247  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
248  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
249 }
250 
251 
252 /*********************************************************************************************************/
253 template<class Scalar,
254  class LocalOrdinal,
255  class GlobalOrdinal,
256  class LocalOrdinalViewType>
257 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
258  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
259  const LocalOrdinalViewType & targetMapToOrigRow_dev,
260  const LocalOrdinalViewType & targetMapToImportRow_dev,
261  const LocalOrdinalViewType & Bcol2Ccol_dev,
262  const LocalOrdinalViewType & Icol2Ccol_dev,
263  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
264  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
265  const std::string& label,
266  const Teuchos::RCP<Teuchos::ParameterList>& params) {
267 
268  // FIXME: Right now, this is a cut-and-paste of the serial kernel
269  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
270 
271 #ifdef HAVE_TPETRA_MMM_TIMINGS
272  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
273  using Teuchos::TimeMonitor;
274  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
275  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
276 #endif
277  using Teuchos::RCP;
278  using Teuchos::rcp;
279 
280 
281  // Lots and lots of typedefs
282  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
283  typedef typename KCRS::StaticCrsGraphType graph_t;
284  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
285  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
286  typedef typename KCRS::values_type::non_const_type scalar_view_t;
287 
288  typedef Scalar SC;
289  typedef LocalOrdinal LO;
290  typedef GlobalOrdinal GO;
291  typedef Node NO;
292  typedef Map<LO,GO,NO> map_type;
293  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
294  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
295  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
296 
297  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
298  // KDDKDD UVM Ideally, this function would run on device and use
299  // KDDKDD UVM KokkosKernels instead of this host implementation.
300  auto targetMapToOrigRow =
301  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
302  targetMapToOrigRow_dev);
303  auto targetMapToImportRow =
304  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
305  targetMapToImportRow_dev);
306  auto Bcol2Ccol =
307  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
308  Bcol2Ccol_dev);
309  auto Icol2Ccol =
310  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
311  Icol2Ccol_dev);
312 
313  // Sizes
314  RCP<const map_type> Ccolmap = C.getColMap();
315  size_t m = Aview.origMatrix->getLocalNumRows();
316  size_t n = Ccolmap->getLocalNumElements();
317 
318  // Grab the Kokkos::SparseCrsMatrices & inner stuff
319  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
320  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
321  const KCRS & Cmat = C.getLocalMatrixHost();
322 
323  c_lno_view_t Arowptr = Amat.graph.row_map,
324  Browptr = Bmat.graph.row_map,
325  Crowptr = Cmat.graph.row_map;
326  const lno_nnz_view_t Acolind = Amat.graph.entries,
327  Bcolind = Bmat.graph.entries,
328  Ccolind = Cmat.graph.entries;
329  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
330  scalar_view_t Cvals = Cmat.values;
331 
332  c_lno_view_t Irowptr;
333  lno_nnz_view_t Icolind;
334  scalar_view_t Ivals;
335  if(!Bview.importMatrix.is_null()) {
336  auto lclB = Bview.importMatrix->getLocalMatrixHost();
337  Irowptr = lclB.graph.row_map;
338  Icolind = lclB.graph.entries;
339  Ivals = lclB.values;
340  }
341 
342 #ifdef HAVE_TPETRA_MMM_TIMINGS
343  MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
344 #endif
345 
346  // Classic csr assembly (low memory edition)
347  // mfh 27 Sep 2016: The c_status array is an implementation detail
348  // of the local sparse matrix-matrix multiply routine.
349 
350  // The status array will contain the index into colind where this entry was last deposited.
351  // c_status[i] < CSR_ip - not in the row yet
352  // c_status[i] >= CSR_ip - this is the entry where you can find the data
353  // We start with this filled with INVALID's indicating that there are no entries yet.
354  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
355  std::vector<size_t> c_status(n, ST_INVALID);
356 
357  // For each row of A/C
358  size_t CSR_ip = 0, OLD_ip = 0;
359  for (size_t i = 0; i < m; i++) {
360  // First fill the c_status array w/ locations where we're allowed to
361  // generate nonzeros for this row
362  OLD_ip = Crowptr[i];
363  CSR_ip = Crowptr[i+1];
364  for (size_t k = OLD_ip; k < CSR_ip; k++) {
365  c_status[Ccolind[k]] = k;
366 
367  // Reset values in the row of C
368  Cvals[k] = SC_ZERO;
369  }
370 
371  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
372  LO Aik = Acolind[k];
373  const SC Aval = Avals[k];
374  if (Aval == SC_ZERO)
375  continue;
376 
377  if (targetMapToOrigRow[Aik] != LO_INVALID) {
378  // Local matrix
379  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
380 
381  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
382  LO Bkj = Bcolind[j];
383  LO Cij = Bcol2Ccol[Bkj];
384 
385  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
386  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
387  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
388 
389  Cvals[c_status[Cij]] += Aval * Bvals[j];
390  }
391 
392  } else {
393  // Remote matrix
394  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
395  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
396  LO Ikj = Icolind[j];
397  LO Cij = Icol2Ccol[Ikj];
398 
399  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
400  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
401  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
402 
403  Cvals[c_status[Cij]] += Aval * Ivals[j];
404  }
405  }
406  }
407  }
408 
409  C.fillComplete(C.getDomainMap(), C.getRangeMap());
410 }
411 
412 /*********************************************************************************************************/
413 template<class Scalar,
414  class LocalOrdinal,
415  class GlobalOrdinal,
416  class LocalOrdinalViewType>
417 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
418  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
419  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
420  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
421  const LocalOrdinalViewType & Acol2Brow,
422  const LocalOrdinalViewType & Acol2Irow,
423  const LocalOrdinalViewType & Bcol2Ccol,
424  const LocalOrdinalViewType & Icol2Ccol,
425  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
426  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
427  const std::string& label,
428  const Teuchos::RCP<Teuchos::ParameterList>& params) {
429 
430 #ifdef HAVE_TPETRA_MMM_TIMINGS
431  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
432  using Teuchos::TimeMonitor;
433  Teuchos::RCP<TimeMonitor> MM;
434 #endif
435 
436  // Node-specific code
437  using Teuchos::RCP;
438 
439  // Options
440  //int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
441  std::string myalg("KK");
442  if(!params.is_null()) {
443  if(params->isParameter("hip: jacobi algorithm"))
444  myalg = params->get("hip: jacobi algorithm",myalg);
445  }
446 
447  if(myalg == "MSAK") {
448  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
449  }
450  else if(myalg == "KK") {
451  jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
452  }
453  else {
454  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
455  }
456 
457 #ifdef HAVE_TPETRA_MMM_TIMINGS
458  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix HIPESFC"))));
459 #endif
460 
461  // Final Fillcomplete
462  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
463  labelList->set("Timer Label",label);
464  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
465 
466  // NOTE: MSAK already fillCompletes, so we have to check here
467  if(!C.isFillComplete()) {
468  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
469  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
470  }
471 
472 }
473 
474 
475 
476 /*********************************************************************************************************/
477 template<class Scalar,
478  class LocalOrdinal,
479  class GlobalOrdinal,
480  class LocalOrdinalViewType>
481 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
482  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
483  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
484  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
485  const LocalOrdinalViewType & targetMapToOrigRow_dev,
486  const LocalOrdinalViewType & targetMapToImportRow_dev,
487  const LocalOrdinalViewType & Bcol2Ccol_dev,
488  const LocalOrdinalViewType & Icol2Ccol_dev,
489  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
490  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
491  const std::string& label,
492  const Teuchos::RCP<Teuchos::ParameterList>& params) {
493 
494  // FIXME: Right now, this is a cut-and-paste of the serial kernel
495  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
496 
497 #ifdef HAVE_TPETRA_MMM_TIMINGS
498  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
499  using Teuchos::TimeMonitor;
500  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse HIPCore"))));
501  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
502 #endif
503  using Teuchos::RCP;
504  using Teuchos::rcp;
505 
506  // Lots and lots of typedefs
507  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
508  typedef typename KCRS::StaticCrsGraphType graph_t;
509  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
510  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
511  typedef typename KCRS::values_type::non_const_type scalar_view_t;
512  typedef typename scalar_view_t::memory_space scalar_memory_space;
513 
514  typedef Scalar SC;
515  typedef LocalOrdinal LO;
516  typedef GlobalOrdinal GO;
517  typedef Node NO;
518  typedef Map<LO,GO,NO> map_type;
519  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
520  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
521  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
522 
523  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
524  // KDDKDD UVM Ideally, this function would run on device and use
525  // KDDKDD UVM KokkosKernels instead of this host implementation.
526  auto targetMapToOrigRow =
527  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
528  targetMapToOrigRow_dev);
529  auto targetMapToImportRow =
530  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
531  targetMapToImportRow_dev);
532  auto Bcol2Ccol =
533  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
534  Bcol2Ccol_dev);
535  auto Icol2Ccol =
536  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
537  Icol2Ccol_dev);
538 
539 
540  // Sizes
541  RCP<const map_type> Ccolmap = C.getColMap();
542  size_t m = Aview.origMatrix->getLocalNumRows();
543  size_t n = Ccolmap->getLocalNumElements();
544 
545  // Grab the Kokkos::SparseCrsMatrices & inner stuff
546  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
547  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
548  const KCRS & Cmat = C.getLocalMatrixHost();
549 
550  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
551  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
552  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
553  scalar_view_t Cvals = Cmat.values;
554 
555  c_lno_view_t Irowptr;
556  lno_nnz_view_t Icolind;
557  scalar_view_t Ivals;
558  if(!Bview.importMatrix.is_null()) {
559  auto lclB = Bview.importMatrix->getLocalMatrixHost();
560  Irowptr = lclB.graph.row_map;
561  Icolind = lclB.graph.entries;
562  Ivals = lclB.values;
563  }
564 
565  // Jacobi-specific inner stuff
566  auto Dvals =
567  Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
568 
569 #ifdef HAVE_TPETRA_MMM_TIMINGS
570  MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse HIPCore - Compare"))));
571 #endif
572 
573  // The status array will contain the index into colind where this entry was last deposited.
574  // c_status[i] < CSR_ip - not in the row yet
575  // c_status[i] >= CSR_ip - this is the entry where you can find the data
576  // We start with this filled with INVALID's indicating that there are no entries yet.
577  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
578  std::vector<size_t> c_status(n, ST_INVALID);
579 
580  // For each row of A/C
581  size_t CSR_ip = 0, OLD_ip = 0;
582  for (size_t i = 0; i < m; i++) {
583 
584  // First fill the c_status array w/ locations where we're allowed to
585  // generate nonzeros for this row
586  OLD_ip = Crowptr[i];
587  CSR_ip = Crowptr[i+1];
588  for (size_t k = OLD_ip; k < CSR_ip; k++) {
589  c_status[Ccolind[k]] = k;
590 
591  // Reset values in the row of C
592  Cvals[k] = SC_ZERO;
593  }
594 
595  SC minusOmegaDval = -omega*Dvals(i,0);
596 
597  // Entries of B
598  for (size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
599  Scalar Bval = Bvals[j];
600  if (Bval == SC_ZERO)
601  continue;
602  LO Bij = Bcolind[j];
603  LO Cij = Bcol2Ccol[Bij];
604 
605  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
606  std::runtime_error, "Trying to insert a new entry into a static graph");
607 
608  Cvals[c_status[Cij]] = Bvals[j];
609  }
610 
611  // Entries of -omega * Dinv * A * B
612  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
613  LO Aik = Acolind[k];
614  const SC Aval = Avals[k];
615  if (Aval == SC_ZERO)
616  continue;
617 
618  if (targetMapToOrigRow[Aik] != LO_INVALID) {
619  // Local matrix
620  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
621 
622  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
623  LO Bkj = Bcolind[j];
624  LO Cij = Bcol2Ccol[Bkj];
625 
626  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
627  std::runtime_error, "Trying to insert a new entry into a static graph");
628 
629  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
630  }
631 
632  } else {
633  // Remote matrix
634  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
635  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
636  LO Ikj = Icolind[j];
637  LO Cij = Icol2Ccol[Ikj];
638 
639  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
640  std::runtime_error, "Trying to insert a new entry into a static graph");
641 
642  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
643  }
644  }
645  }
646  }
647 
648 #ifdef HAVE_TPETRA_MMM_TIMINGS
649  MM2= Teuchos::null;
650  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
651 #endif
652 
653  C.fillComplete(C.getDomainMap(), C.getRangeMap());
654 
655 }
656 
657 /*********************************************************************************************************/
658 template<class Scalar,
659  class LocalOrdinal,
660  class GlobalOrdinal,
661  class LocalOrdinalViewType>
662 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
663  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
664  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
665  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
666  const LocalOrdinalViewType & Acol2Brow,
667  const LocalOrdinalViewType & Acol2Irow,
668  const LocalOrdinalViewType & Bcol2Ccol,
669  const LocalOrdinalViewType & Icol2Ccol,
670  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
671  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
672  const std::string& label,
673  const Teuchos::RCP<Teuchos::ParameterList>& params) {
674 
675 #ifdef HAVE_TPETRA_MMM_TIMINGS
676  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
677  using Teuchos::TimeMonitor;
678  Teuchos::RCP<TimeMonitor> MM;
679 #endif
680 
681  // Check if the diagonal entries exist in debug mode
682  const bool debug = Tpetra::Details::Behavior::debug();
683  if(debug) {
684 
685  auto rowMap = Aview.origMatrix->getRowMap();
686  Tpetra::Vector<Scalar> diags(rowMap);
687  Aview.origMatrix->getLocalDiagCopy(diags);
688  size_t diagLength = rowMap->getLocalNumElements();
689  Teuchos::Array<Scalar> diagonal(diagLength);
690  diags.get1dCopy(diagonal());
691 
692  for(size_t i = 0; i < diagLength; ++i) {
693  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
694  std::runtime_error,
695  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
696  "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
697  }
698  }
699 
700  // Usings
701  using device_t = typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
703  using graph_t = typename matrix_t::StaticCrsGraphType;
704  using lno_view_t = typename graph_t::row_map_type::non_const_type;
705  using int_view_t = Kokkos::View<int *,
706  typename lno_view_t::array_layout,
707  typename lno_view_t::memory_space,
708  typename lno_view_t::memory_traits>;
709  using c_lno_view_t = typename graph_t::row_map_type::const_type;
710  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
711  using scalar_view_t = typename matrix_t::values_type::non_const_type;
712 
713  // KokkosKernels handle
714  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
715  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
716  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
717  using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
718  typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
719  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
720 
721  // Merge the B and Bimport matrices
722  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
723 
724  // Get the properties and arrays of input matrices
725  const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
726  const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
727 
728  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
729  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
730  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
731 
732  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
733  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
734  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
735 
736  // Arrays of the output matrix
737  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
738  lno_nnz_view_t entriesC;
739  scalar_view_t valuesC;
740 
741  // Options
742  int team_work_size = 16;
743  std::string myalg("SPGEMM_KK_MEMORY");
744  if(!params.is_null()) {
745  if(params->isParameter("hip: algorithm"))
746  myalg = params->get("hip: algorithm",myalg);
747  if(params->isParameter("hip: team work size"))
748  team_work_size = params->get("hip: team work size",team_work_size);
749  }
750 
751  // Get the algorithm mode
752  std::string nodename("HIP");
753  std::string alg = nodename + std::string(" algorithm");
754  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
755  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
756 
757  Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
758  const bool useIntRowptrs =
759  irph.shouldUseIntRowptrs() &&
760  Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
761 
762  if (useIntRowptrs) {
763  int_handle_t kh;
764  kh.create_spgemm_handle(alg_enum);
765  kh.set_team_work_size(team_work_size);
766 
767  int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
768 
769  auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
770  auto Bint = irph.getIntRowptrMatrix(Bmerged);
771 
772  KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
773  Aint.graph.row_map, Aint.graph.entries, false,
774  Bint.graph.row_map, Bint.graph.entries, false,
775  int_row_mapC);
776 
777  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
778  if (c_nnz_size){
779  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
780  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
781  }
782 
783  // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
784  // so need to have a special int-typed call for this as well.
785  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
786  Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
787  Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
788  int_row_mapC, entriesC, valuesC,
789  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
790  // transfer the integer rowptrs back to the correct rowptr type
791  Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
792  kh.destroy_spgemm_handle();
793  } else {
794  handle_t kh;
795  kh.create_spgemm_handle(alg_enum);
796  kh.set_team_work_size(team_work_size);
797 
798  KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
799  Amat.graph.row_map, Amat.graph.entries, false,
800  Bmerged.graph.row_map, Bmerged.graph.entries, false,
801  row_mapC);
802 
803  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
804  if (c_nnz_size){
805  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
806  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
807  }
808  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
809  Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
810  Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
811  row_mapC, entriesC, valuesC,
812  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
813  kh.destroy_spgemm_handle();
814  }
815 
816 #ifdef HAVE_TPETRA_MMM_TIMINGS
817  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix HIPSort"))));
818 #endif
819 
820  // Sort & set values
821  if (params.is_null() || params->get("sort entries",true))
822  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
823  C.setAllValues(row_mapC,entriesC,valuesC);
824 
825 #ifdef HAVE_TPETRA_MMM_TIMINGS
826  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix HIPESFC"))));
827 #endif
828 
829  // Final Fillcomplete
830  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
831  labelList->set("Timer Label",label);
832  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
833  Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
834  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
835 }
836 
837  }//MMdetails
838 }//Tpetra
839 
840 #endif//HIP
841 
842 #endif
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.