Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_HIP.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
12 
13 #include "Tpetra_Details_IntRowPtrHelper.hpp"
14 
15 #ifdef HAVE_TPETRA_INST_HIP
16 namespace Tpetra {
17 namespace MMdetails {
18 
19 /*********************************************************************************************************/
20 // MMM KernelWrappers for Partial Specialization to HIP
21 template<class Scalar,
22  class LocalOrdinal,
23  class GlobalOrdinal,
24  class LocalOrdinalViewType>
25 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
26  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
27  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
28  const LocalOrdinalViewType & Acol2Brow,
29  const LocalOrdinalViewType & Acol2Irow,
30  const LocalOrdinalViewType & Bcol2Ccol,
31  const LocalOrdinalViewType & Icol2Ccol,
32  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
33  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
34  const std::string& label = std::string(),
35  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36 
37 
38 
39  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
40  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
41  const LocalOrdinalViewType & Acol2Brow,
42  const LocalOrdinalViewType & Acol2Irow,
43  const LocalOrdinalViewType & Bcol2Ccol,
44  const LocalOrdinalViewType & Icol2Ccol,
45  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
46  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
47  const std::string& label = std::string(),
48  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
49 
50 };
51 
52 // Jacobi KernelWrappers for Partial Specialization to HIP
53 template<class Scalar,
54  class LocalOrdinal,
55  class GlobalOrdinal, class LocalOrdinalViewType>
56 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
57  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
58  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
59  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
60  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
61  const LocalOrdinalViewType & Acol2Brow,
62  const LocalOrdinalViewType & Acol2Irow,
63  const LocalOrdinalViewType & Bcol2Ccol,
64  const LocalOrdinalViewType & Icol2Ccol,
65  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
66  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
67  const std::string& label = std::string(),
68  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69 
70  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
71  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
72  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
73  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
74  const LocalOrdinalViewType & Acol2Brow,
75  const LocalOrdinalViewType & Acol2Irow,
76  const LocalOrdinalViewType & Bcol2Ccol,
77  const LocalOrdinalViewType & Icol2Ccol,
78  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
79  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
80  const std::string& label = std::string(),
81  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82 
83  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
84  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
85  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
86  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
87  const LocalOrdinalViewType & Acol2Brow,
88  const LocalOrdinalViewType & Acol2Irow,
89  const LocalOrdinalViewType & Bcol2Ccol,
90  const LocalOrdinalViewType & Icol2Ccol,
91  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
92  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
93  const std::string& label = std::string(),
94  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
95 };
96 
97 
98 /*********************************************************************************************************/
99 // AB NewMatrix Kernel wrappers (KokkosKernels/HIP Version)
100 template<class Scalar,
101  class LocalOrdinal,
102  class GlobalOrdinal,
103  class LocalOrdinalViewType>
104 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
105  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
106  const LocalOrdinalViewType & Acol2Brow,
107  const LocalOrdinalViewType & Acol2Irow,
108  const LocalOrdinalViewType & Bcol2Ccol,
109  const LocalOrdinalViewType & Icol2Ccol,
110  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
111  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
112  const std::string& label,
113  const Teuchos::RCP<Teuchos::ParameterList>& params) {
114 
115  // Node-specific code
116  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
117  std::string nodename("HIP");
118 
119  // Lots and lots of typedefs
120  using Teuchos::RCP;
121  using Teuchos::rcp;
123  typedef typename KCRS::device_type device_t;
124  typedef typename KCRS::StaticCrsGraphType graph_t;
125  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
126  using int_view_t = Kokkos::View<int *, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
127  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
128  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
129  typedef typename KCRS::values_type::non_const_type scalar_view_t;
130  //typedef typename graph_t::row_map_type::const_type lno_view_t_const;
131 
132  RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIPWrapper"));
133 
134  // Options
135  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
136  std::string myalg("SPGEMM_KK_MEMORY");
137  if(!params.is_null()) {
138  if(params->isParameter("hip: algorithm"))
139  myalg = params->get("hip: algorithm",myalg);
140  if(params->isParameter("hip: team work size"))
141  team_work_size = params->get("hip: team work size",team_work_size);
142  }
143 
144  // KokkosKernelsHandle
145  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
146  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
147  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
148  using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
149  typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
150  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
151 
152  // Grab the Kokkos::SparseCrsMatrices
153  const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
154  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
155 
156  c_lno_view_t Arowptr = Amat.graph.row_map,
157  Browptr = Bmat.graph.row_map;
158  const lno_nnz_view_t Acolind = Amat.graph.entries,
159  Bcolind = Bmat.graph.entries;
160  const scalar_view_t Avals = Amat.values,
161  Bvals = Bmat.values;
162 
163  // Get the algorithm mode
164  std::string alg = nodename+std::string(" algorithm");
165  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
166  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
167  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
168 
169  // Merge the B and Bimport matrices
170  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
171 
172  MM = Teuchos::null; MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP Core"));
173 
174  // Do the multiply on whatever we've got
175  typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
176  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
177  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
178 
179  // regardless of whether integer row ptrs are used, need to ultimately produce the row pointer, entries, and values of the expected types
180  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
181  lno_nnz_view_t entriesC;
182  scalar_view_t valuesC;
183 
184  Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
185  const bool useIntRowptrs =
186  irph.shouldUseIntRowptrs() &&
187  Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
188 
189  if (useIntRowptrs) {
190  IntKernelHandle kh;
191  kh.create_spgemm_handle(alg_enum);
192  kh.set_team_work_size(team_work_size);
193 
194  int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
195 
196  auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
197  auto Bint = irph.getIntRowptrMatrix(Bmerged);
198 
199  {
200  Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels symbolic int");
201  KokkosSparse::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,false,Bint.graph.row_map,Bint.graph.entries,false, int_row_mapC);
202  }
203  Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels numeric int");
204 
205  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
206  if (c_nnz_size){
207  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
208  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
209  }
210  KokkosSparse::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,Aint.values,false,Bint.graph.row_map,Bint.graph.entries,Bint.values,false,int_row_mapC,entriesC,valuesC);
211  // transfer the integer rowptrs back to the correct rowptr type
212  Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
213  kh.destroy_spgemm_handle();
214  } else {
215  KernelHandle kh;
216  kh.create_spgemm_handle(alg_enum);
217  kh.set_team_work_size(team_work_size);
218 
219  {
220  Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels symbolic non-int");
221  KokkosSparse::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
222  }
223 
224  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
225  if (c_nnz_size){
226  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
227  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
228  }
229 
230  Tpetra::Details::ProfilingRegion MM2("TpetraExt: MMM: Newmatrix KokkosKernels numeric non-int");
231  KokkosSparse::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
232  kh.destroy_spgemm_handle();
233  }
234 
235  MM = Teuchos::null; MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP Sort"));
236 
237  // Sort & set values
238  if (params.is_null() || params->get("sort entries",true))
239  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
240  C.setAllValues(row_mapC,entriesC,valuesC);
241 
242 
243  MM = Teuchos::null; MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix HIP ESFC"));
244 
245 
246  // Final Fillcomplete
247  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
248  labelList->set("Timer Label",label);
249  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
250  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
251  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
252 }
253 
254 
255 /*********************************************************************************************************/
256 template<class Scalar,
257  class LocalOrdinal,
258  class GlobalOrdinal,
259  class LocalOrdinalViewType>
260 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
261  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
262  const LocalOrdinalViewType & targetMapToOrigRow_dev,
263  const LocalOrdinalViewType & targetMapToImportRow_dev,
264  const LocalOrdinalViewType & Bcol2Ccol_dev,
265  const LocalOrdinalViewType & Icol2Ccol_dev,
266  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
267  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
268  const std::string& label,
269  const Teuchos::RCP<Teuchos::ParameterList>& params) {
270 
271  // FIXME: Right now, this is a cut-and-paste of the serial kernel
272  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
273  using Teuchos::RCP;
274  using Teuchos::rcp;
275 
276  RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Reuse SerialCore"));
277 
278  // Lots and lots of typedefs
279  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
280  typedef typename KCRS::StaticCrsGraphType graph_t;
281  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
282  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
283  typedef typename KCRS::values_type::non_const_type scalar_view_t;
284 
285  typedef Scalar SC;
286  typedef LocalOrdinal LO;
287  typedef GlobalOrdinal GO;
288  typedef Node NO;
289  typedef Map<LO,GO,NO> map_type;
290  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
291  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
292  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
293 
294  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
295  // KDDKDD UVM Ideally, this function would run on device and use
296  // KDDKDD UVM KokkosKernels instead of this host implementation.
297  auto targetMapToOrigRow =
298  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
299  targetMapToOrigRow_dev);
300  auto targetMapToImportRow =
301  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
302  targetMapToImportRow_dev);
303  auto Bcol2Ccol =
304  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
305  Bcol2Ccol_dev);
306  auto Icol2Ccol =
307  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
308  Icol2Ccol_dev);
309 
310  // Sizes
311  RCP<const map_type> Ccolmap = C.getColMap();
312  size_t m = Aview.origMatrix->getLocalNumRows();
313  size_t n = Ccolmap->getLocalNumElements();
314 
315  // Grab the Kokkos::SparseCrsMatrices & inner stuff
316  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
317  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
318  const KCRS & Cmat = C.getLocalMatrixHost();
319 
320  c_lno_view_t Arowptr = Amat.graph.row_map,
321  Browptr = Bmat.graph.row_map,
322  Crowptr = Cmat.graph.row_map;
323  const lno_nnz_view_t Acolind = Amat.graph.entries,
324  Bcolind = Bmat.graph.entries,
325  Ccolind = Cmat.graph.entries;
326  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
327  scalar_view_t Cvals = Cmat.values;
328 
329  c_lno_view_t Irowptr;
330  lno_nnz_view_t Icolind;
331  scalar_view_t Ivals;
332  if(!Bview.importMatrix.is_null()) {
333  auto lclB = Bview.importMatrix->getLocalMatrixHost();
334  Irowptr = lclB.graph.row_map;
335  Icolind = lclB.graph.entries;
336  Ivals = lclB.values;
337  }
338 
339  auto MM2 = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: MMM: Newmatrix SerialCore - Compare"));
340 
341  // Classic csr assembly (low memory edition)
342  // mfh 27 Sep 2016: The c_status array is an implementation detail
343  // of the local sparse matrix-matrix multiply routine.
344 
345  // The status array will contain the index into colind where this entry was last deposited.
346  // c_status[i] < CSR_ip - not in the row yet
347  // c_status[i] >= CSR_ip - this is the entry where you can find the data
348  // We start with this filled with INVALID's indicating that there are no entries yet.
349  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
350  std::vector<size_t> c_status(n, ST_INVALID);
351 
352  // For each row of A/C
353  size_t CSR_ip = 0, OLD_ip = 0;
354  for (size_t i = 0; i < m; i++) {
355  // First fill the c_status array w/ locations where we're allowed to
356  // generate nonzeros for this row
357  OLD_ip = Crowptr[i];
358  CSR_ip = Crowptr[i+1];
359  for (size_t k = OLD_ip; k < CSR_ip; k++) {
360  c_status[Ccolind[k]] = k;
361 
362  // Reset values in the row of C
363  Cvals[k] = SC_ZERO;
364  }
365 
366  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
367  LO Aik = Acolind[k];
368  const SC Aval = Avals[k];
369  if (Aval == SC_ZERO)
370  continue;
371 
372  if (targetMapToOrigRow[Aik] != LO_INVALID) {
373  // Local matrix
374  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
375 
376  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
377  LO Bkj = Bcolind[j];
378  LO Cij = Bcol2Ccol[Bkj];
379 
380  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
381  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
382  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
383 
384  Cvals[c_status[Cij]] += Aval * Bvals[j];
385  }
386 
387  } else {
388  // Remote matrix
389  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
390  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
391  LO Ikj = Icolind[j];
392  LO Cij = Icol2Ccol[Ikj];
393 
394  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
395  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
396  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
397 
398  Cvals[c_status[Cij]] += Aval * Ivals[j];
399  }
400  }
401  }
402  }
403 
404  C.fillComplete(C.getDomainMap(), C.getRangeMap());
405 }
406 
407 /*********************************************************************************************************/
408 template<class Scalar,
409  class LocalOrdinal,
410  class GlobalOrdinal,
411  class LocalOrdinalViewType>
412 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
413  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
414  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
415  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
416  const LocalOrdinalViewType & Acol2Brow,
417  const LocalOrdinalViewType & Acol2Irow,
418  const LocalOrdinalViewType & Bcol2Ccol,
419  const LocalOrdinalViewType & Icol2Ccol,
420  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
421  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
422  const std::string& label,
423  const Teuchos::RCP<Teuchos::ParameterList>& params) {
424 
425 
426 
427  // Node-specific code
428  using Teuchos::RCP;
429 
430  // Options
431  //int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
432  std::string myalg("KK");
433  if(!params.is_null()) {
434  if(params->isParameter("hip: jacobi algorithm"))
435  myalg = params->get("hip: jacobi algorithm",myalg);
436  }
437 
438  if(myalg == "MSAK") {
439  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
440  }
441  else if(myalg == "KK") {
442  jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
443  }
444  else {
445  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
446  }
447 
448  Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPESFC");
449 
450  // Final Fillcomplete
451  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
452  labelList->set("Timer Label",label);
453  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
454 
455  // NOTE: MSAK already fillCompletes, so we have to check here
456  if(!C.isFillComplete()) {
457  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
458  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
459  }
460 
461 }
462 
463 
464 
465 /*********************************************************************************************************/
466 template<class Scalar,
467  class LocalOrdinal,
468  class GlobalOrdinal,
469  class LocalOrdinalViewType>
470 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
471  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
472  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
473  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
474  const LocalOrdinalViewType & targetMapToOrigRow_dev,
475  const LocalOrdinalViewType & targetMapToImportRow_dev,
476  const LocalOrdinalViewType & Bcol2Ccol_dev,
477  const LocalOrdinalViewType & Icol2Ccol_dev,
478  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
479  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
480  const std::string& label,
481  const Teuchos::RCP<Teuchos::ParameterList>& params) {
482 
483  // FIXME: Right now, this is a cut-and-paste of the serial kernel
484  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
485 
486  using Teuchos::RCP;
487  using Teuchos::rcp;
488  RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse HIPCore"));
489 
490  // Lots and lots of typedefs
491  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
492  typedef typename KCRS::StaticCrsGraphType graph_t;
493  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
494  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
495  typedef typename KCRS::values_type::non_const_type scalar_view_t;
496  typedef typename scalar_view_t::memory_space scalar_memory_space;
497 
498  typedef Scalar SC;
499  typedef LocalOrdinal LO;
500  typedef GlobalOrdinal GO;
501  typedef Node NO;
502  typedef Map<LO,GO,NO> map_type;
503  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
504  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
505  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
506 
507  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
508  // KDDKDD UVM Ideally, this function would run on device and use
509  // KDDKDD UVM KokkosKernels instead of this host implementation.
510  auto targetMapToOrigRow =
511  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
512  targetMapToOrigRow_dev);
513  auto targetMapToImportRow =
514  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
515  targetMapToImportRow_dev);
516  auto Bcol2Ccol =
517  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
518  Bcol2Ccol_dev);
519  auto Icol2Ccol =
520  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
521  Icol2Ccol_dev);
522 
523 
524  // Sizes
525  RCP<const map_type> Ccolmap = C.getColMap();
526  size_t m = Aview.origMatrix->getLocalNumRows();
527  size_t n = Ccolmap->getLocalNumElements();
528 
529  // Grab the Kokkos::SparseCrsMatrices & inner stuff
530  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
531  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
532  const KCRS & Cmat = C.getLocalMatrixHost();
533 
534  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
535  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
536  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
537  scalar_view_t Cvals = Cmat.values;
538 
539  c_lno_view_t Irowptr;
540  lno_nnz_view_t Icolind;
541  scalar_view_t Ivals;
542  if(!Bview.importMatrix.is_null()) {
543  auto lclB = Bview.importMatrix->getLocalMatrixHost();
544  Irowptr = lclB.graph.row_map;
545  Icolind = lclB.graph.entries;
546  Ivals = lclB.values;
547  }
548 
549  // Jacobi-specific inner stuff
550  auto Dvals =
551  Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
552 
553  RCP<Tpetra::Details::ProfilingRegion> MM2 = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse HIPCore - Compare"));
554 
555  // The status array will contain the index into colind where this entry was last deposited.
556  // c_status[i] < CSR_ip - not in the row yet
557  // c_status[i] >= CSR_ip - this is the entry where you can find the data
558  // We start with this filled with INVALID's indicating that there are no entries yet.
559  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
560  std::vector<size_t> c_status(n, ST_INVALID);
561 
562  // For each row of A/C
563  size_t CSR_ip = 0, OLD_ip = 0;
564  for (size_t i = 0; i < m; i++) {
565 
566  // First fill the c_status array w/ locations where we're allowed to
567  // generate nonzeros for this row
568  OLD_ip = Crowptr[i];
569  CSR_ip = Crowptr[i+1];
570  for (size_t k = OLD_ip; k < CSR_ip; k++) {
571  c_status[Ccolind[k]] = k;
572 
573  // Reset values in the row of C
574  Cvals[k] = SC_ZERO;
575  }
576 
577  SC minusOmegaDval = -omega*Dvals(i,0);
578 
579  // Entries of B
580  for (size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
581  Scalar Bval = Bvals[j];
582  if (Bval == SC_ZERO)
583  continue;
584  LO Bij = Bcolind[j];
585  LO Cij = Bcol2Ccol[Bij];
586 
587  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
588  std::runtime_error, "Trying to insert a new entry into a static graph");
589 
590  Cvals[c_status[Cij]] = Bvals[j];
591  }
592 
593  // Entries of -omega * Dinv * A * B
594  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
595  LO Aik = Acolind[k];
596  const SC Aval = Avals[k];
597  if (Aval == SC_ZERO)
598  continue;
599 
600  if (targetMapToOrigRow[Aik] != LO_INVALID) {
601  // Local matrix
602  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
603 
604  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
605  LO Bkj = Bcolind[j];
606  LO Cij = Bcol2Ccol[Bkj];
607 
608  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
609  std::runtime_error, "Trying to insert a new entry into a static graph");
610 
611  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
612  }
613 
614  } else {
615  // Remote matrix
616  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
617  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
618  LO Ikj = Icolind[j];
619  LO Cij = Icol2Ccol[Ikj];
620 
621  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
622  std::runtime_error, "Trying to insert a new entry into a static graph");
623 
624  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
625  }
626  }
627  }
628  }
629 
630  MM = Teuchos::null; MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Reuse ESFC"));
631 
632  C.fillComplete(C.getDomainMap(), C.getRangeMap());
633 
634 }
635 
636 /*********************************************************************************************************/
637 template<class Scalar,
638  class LocalOrdinal,
639  class GlobalOrdinal,
640  class LocalOrdinalViewType>
641 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
642  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
643  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
644  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
645  const LocalOrdinalViewType & Acol2Brow,
646  const LocalOrdinalViewType & Acol2Irow,
647  const LocalOrdinalViewType & Bcol2Ccol,
648  const LocalOrdinalViewType & Icol2Ccol,
649  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
650  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
651  const std::string& label,
652  const Teuchos::RCP<Teuchos::ParameterList>& params) {
653 
654 
655 
656  // Check if the diagonal entries exist in debug mode
657  const bool debug = Tpetra::Details::Behavior::debug();
658  if(debug) {
659 
660  auto rowMap = Aview.origMatrix->getRowMap();
661  Tpetra::Vector<Scalar> diags(rowMap);
662  Aview.origMatrix->getLocalDiagCopy(diags);
663  size_t diagLength = rowMap->getLocalNumElements();
664  Teuchos::Array<Scalar> diagonal(diagLength);
665  diags.get1dCopy(diagonal());
666 
667  for(size_t i = 0; i < diagLength; ++i) {
668  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
669  std::runtime_error,
670  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
671  "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
672  }
673  }
674 
675  using Teuchos::RCP;
676  using Teuchos::rcp;
677  RCP<Tpetra::Details::ProfilingRegion> MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix KokkosKernels"));
678 
679  // Usings
680  using device_t = typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
682  using graph_t = typename matrix_t::StaticCrsGraphType;
683  using lno_view_t = typename graph_t::row_map_type::non_const_type;
684  using int_view_t = Kokkos::View<int *,
685  typename lno_view_t::array_layout,
686  typename lno_view_t::memory_space,
687  typename lno_view_t::memory_traits>;
688  using c_lno_view_t = typename graph_t::row_map_type::const_type;
689  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
690  using scalar_view_t = typename matrix_t::values_type::non_const_type;
691 
692  // KokkosKernels handle
693  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
694  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
695  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
696  using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
697  typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
698  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
699 
700  // Merge the B and Bimport matrices
701  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
702 
703  // Get the properties and arrays of input matrices
704  const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
705  const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
706 
707  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
708  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
709  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
710 
711  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
712  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
713  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
714 
715  // Arrays of the output matrix
716  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
717  lno_nnz_view_t entriesC;
718  scalar_view_t valuesC;
719 
720  // Options
721  int team_work_size = 16;
722  std::string myalg("SPGEMM_KK_MEMORY");
723  if(!params.is_null()) {
724  if(params->isParameter("hip: algorithm"))
725  myalg = params->get("hip: algorithm",myalg);
726  if(params->isParameter("hip: team work size"))
727  team_work_size = params->get("hip: team work size",team_work_size);
728  }
729 
730  // Get the algorithm mode
731  std::string nodename("HIP");
732  std::string alg = nodename + std::string(" algorithm");
733  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
734  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
735 
736  Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
737  const bool useIntRowptrs =
738  irph.shouldUseIntRowptrs() &&
739  Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
740 
741  if (useIntRowptrs) {
742  int_handle_t kh;
743  kh.create_spgemm_handle(alg_enum);
744  kh.set_team_work_size(team_work_size);
745 
746  int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
747 
748  auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
749  auto Bint = irph.getIntRowptrMatrix(Bmerged);
750  {
751  Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels symbolic int");
752 
753  KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
754  Aint.graph.row_map, Aint.graph.entries, false,
755  Bint.graph.row_map, Bint.graph.entries, false,
756  int_row_mapC);
757  }
758 
759  Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels numeric int");
760 
761  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
762  if (c_nnz_size){
763  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
764  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
765  }
766 
767  // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
768  // so need to have a special int-typed call for this as well.
769  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
770  Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
771  Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
772  int_row_mapC, entriesC, valuesC,
773  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
774  // transfer the integer rowptrs back to the correct rowptr type
775  Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
776  kh.destroy_spgemm_handle();
777  } else {
778  handle_t kh;
779  kh.create_spgemm_handle(alg_enum);
780  kh.set_team_work_size(team_work_size);
781 
782  {
783  Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels symbolic non-int");
784 
785  KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
786  Amat.graph.row_map, Amat.graph.entries, false,
787  Bmerged.graph.row_map, Bmerged.graph.entries, false,
788  row_mapC);
789  }
790 
791  Tpetra::Details::ProfilingRegion MM2("TpetraExt: Jacobi: Newmatrix KokkosKernels numeric non-int");
792 
793  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
794  if (c_nnz_size){
795  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
796  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
797  }
798  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
799  Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
800  Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
801  row_mapC, entriesC, valuesC,
802  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
803  kh.destroy_spgemm_handle();
804  }
805 
806  MM = Teuchos::null; MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPSort"));
807 
808  // Sort & set values
809  if (params.is_null() || params->get("sort entries",true))
810  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
811  C.setAllValues(row_mapC,entriesC,valuesC);
812 
813  MM = Teuchos::null; MM = rcp(new Tpetra::Details::ProfilingRegion("TpetraExt: Jacobi: Newmatrix HIPESFC"));
814 
815  // Final Fillcomplete
816  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
817  labelList->set("Timer Label",label);
818  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
819  Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
820  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
821 }
822 
823  }//MMdetails
824 }//Tpetra
825 
826 #endif//HIP
827 
828 #endif
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.