Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_SYCL.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
12 
13 #include "Tpetra_Details_IntRowPtrHelper.hpp"
14 
15 #ifdef HAVE_TPETRA_INST_SYCL
16 namespace Tpetra {
17 namespace MMdetails {
18 
19 /*********************************************************************************************************/
20 // MMM KernelWrappers for Partial Specialization to SYCL
21 template<class Scalar,
22  class LocalOrdinal,
23  class GlobalOrdinal,
24  class LocalOrdinalViewType>
25 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
26  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
27  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
28  const LocalOrdinalViewType & Acol2Brow,
29  const LocalOrdinalViewType & Acol2Irow,
30  const LocalOrdinalViewType & Bcol2Ccol,
31  const LocalOrdinalViewType & Icol2Ccol,
32  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
33  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
34  const std::string& label = std::string(),
35  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36 
37 
38 
39  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
40  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
41  const LocalOrdinalViewType & Acol2Brow,
42  const LocalOrdinalViewType & Acol2Irow,
43  const LocalOrdinalViewType & Bcol2Ccol,
44  const LocalOrdinalViewType & Icol2Ccol,
45  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
46  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
47  const std::string& label = std::string(),
48  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
49 
50 };
51 
52 // Jacobi KernelWrappers for Partial Specialization to SYCL
53 template<class Scalar,
54  class LocalOrdinal,
55  class GlobalOrdinal, class LocalOrdinalViewType>
56 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
57  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
58  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
59  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
60  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
61  const LocalOrdinalViewType & Acol2Brow,
62  const LocalOrdinalViewType & Acol2Irow,
63  const LocalOrdinalViewType & Bcol2Ccol,
64  const LocalOrdinalViewType & Icol2Ccol,
65  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
66  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
67  const std::string& label = std::string(),
68  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69 
70  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
71  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
72  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
73  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
74  const LocalOrdinalViewType & Acol2Brow,
75  const LocalOrdinalViewType & Acol2Irow,
76  const LocalOrdinalViewType & Bcol2Ccol,
77  const LocalOrdinalViewType & Icol2Ccol,
78  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
79  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
80  const std::string& label = std::string(),
81  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82 
83  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
84  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
85  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
86  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
87  const LocalOrdinalViewType & Acol2Brow,
88  const LocalOrdinalViewType & Acol2Irow,
89  const LocalOrdinalViewType & Bcol2Ccol,
90  const LocalOrdinalViewType & Icol2Ccol,
91  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
92  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
93  const std::string& label = std::string(),
94  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
95 };
96 
97 
98 /*********************************************************************************************************/
99 // AB NewMatrix Kernel wrappers (KokkosKernels/SYCL Version)
100 template<class Scalar,
101  class LocalOrdinal,
102  class GlobalOrdinal,
103  class LocalOrdinalViewType>
104 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
105  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
106  const LocalOrdinalViewType & Acol2Brow,
107  const LocalOrdinalViewType & Acol2Irow,
108  const LocalOrdinalViewType & Bcol2Ccol,
109  const LocalOrdinalViewType & Icol2Ccol,
110  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
111  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
112  const std::string& label,
113  const Teuchos::RCP<Teuchos::ParameterList>& params) {
114 
115 
116 #ifdef HAVE_TPETRA_MMM_TIMINGS
117  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
118  using Teuchos::TimeMonitor;
119  Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLWrapper")))));
120 #endif
121  // Node-specific code
122  typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
123  std::string nodename("SYCL");
124 
125  // Lots and lots of typedefs
126  using Teuchos::RCP;
128  typedef typename KCRS::device_type device_t;
129  typedef typename KCRS::StaticCrsGraphType graph_t;
130  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
131  using int_view_t = Kokkos::View<int *, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
132  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
133  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
134  typedef typename KCRS::values_type::non_const_type scalar_view_t;
135  //typedef typename graph_t::row_map_type::const_type lno_view_t_const;
136 
137  // Options
138  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
139  std::string myalg("SPGEMM_KK_MEMORY");
140  if(!params.is_null()) {
141  if(params->isParameter("sycl: algorithm"))
142  myalg = params->get("sycl: algorithm",myalg);
143  if(params->isParameter("sycl: team work size"))
144  team_work_size = params->get("sycl: team work size",team_work_size);
145  }
146 
147  // KokkosKernelsHandle
148  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
149  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
150  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
151  using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
152  typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
153  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
154 
155  // Grab the Kokkos::SparseCrsMatrices
156  const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
157  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
158 
159  c_lno_view_t Arowptr = Amat.graph.row_map,
160  Browptr = Bmat.graph.row_map;
161  const lno_nnz_view_t Acolind = Amat.graph.entries,
162  Bcolind = Bmat.graph.entries;
163  const scalar_view_t Avals = Amat.values,
164  Bvals = Bmat.values;
165 
166  // Get the algorithm mode
167  std::string alg = nodename+std::string(" algorithm");
168  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
169  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
170  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
171 
172  // Merge the B and Bimport matrices
173  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
174 
175 #ifdef HAVE_TPETRA_MMM_TIMINGS
176  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLCore"))));
177 #endif
178 
179  // Do the multiply on whatever we've got
180  typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
181  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
182  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
183 
184  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
185  lno_nnz_view_t entriesC;
186  scalar_view_t valuesC;
187 
188  // static_assert(std::is_void_v<typename KCRS::row_map_type::non_const_value_type>, "");
189  // static_assert(std::is_void_v<typename decltype(Bmerged)::row_map_type::non_const_value_type>, "");
190  Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
191  const bool useIntRowptrs =
192  irph.shouldUseIntRowptrs() &&
193  Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
194 
195  if (useIntRowptrs) {
196  IntKernelHandle kh;
197  kh.create_spgemm_handle(alg_enum);
198  kh.set_team_work_size(team_work_size);
199 
200  int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
201 
202  auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
203  auto Bint = irph.getIntRowptrMatrix(Bmerged);
204  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,false,Bint.graph.row_map,Bint.graph.entries,false, int_row_mapC);
205 
206  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
207  if (c_nnz_size){
208  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
209  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
210  }
211  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,Aint.values,false,Bint.graph.row_map,Bint.graph.entries,Bint.values,false,int_row_mapC,entriesC,valuesC);
212  // transfer the integer rowptrs back to the correct rowptr type
213  Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
214  kh.destroy_spgemm_handle();
215 
216  } else {
217  KernelHandle kh;
218  kh.create_spgemm_handle(alg_enum);
219  kh.set_team_work_size(team_work_size);
220 
221  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
222 
223  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
224  if (c_nnz_size){
225  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
226  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
227  }
228 
229  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
230  kh.destroy_spgemm_handle();
231  }
232 
233 #ifdef HAVE_TPETRA_MMM_TIMINGS
234  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLSort"))));
235 #endif
236 
237  // Sort & set values
238  if (params.is_null() || params->get("sort entries",true))
239  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
240  C.setAllValues(row_mapC,entriesC,valuesC);
241 
242 #ifdef HAVE_TPETRA_MMM_TIMINGS
243  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLESFC"))));
244 #endif
245 
246  // Final Fillcomplete
247  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
248  labelList->set("Timer Label",label);
249  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
250  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
251  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
252 }
253 
254 
255 /*********************************************************************************************************/
256 template<class Scalar,
257  class LocalOrdinal,
258  class GlobalOrdinal,
259  class LocalOrdinalViewType>
260 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
261  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
262  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
263  const LocalOrdinalViewType & targetMapToOrigRow_dev,
264  const LocalOrdinalViewType & targetMapToImportRow_dev,
265  const LocalOrdinalViewType & Bcol2Ccol_dev,
266  const LocalOrdinalViewType & Icol2Ccol_dev,
267  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
268  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
269  const std::string& label,
270  const Teuchos::RCP<Teuchos::ParameterList>& params) {
271 
272  // FIXME: Right now, this is a cut-and-paste of the serial kernel
273  typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
274 
275 #ifdef HAVE_TPETRA_MMM_TIMINGS
276  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
277  using Teuchos::TimeMonitor;
278  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
279  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
280 #endif
281  using Teuchos::RCP;
282  using Teuchos::rcp;
283 
284 
285  // Lots and lots of typedefs
286  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
287  typedef typename KCRS::StaticCrsGraphType graph_t;
288  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
289  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
290  typedef typename KCRS::values_type::non_const_type scalar_view_t;
291 
292  typedef Scalar SC;
293  typedef LocalOrdinal LO;
294  typedef GlobalOrdinal GO;
295  typedef Node NO;
296  typedef Map<LO,GO,NO> map_type;
297  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
298  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
299  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
300 
301  // Since this is being run on SYCL, we need to fence because the below code will use UVM
302  // typename graph_t::execution_space().fence();
303 
304  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
305  // KDDKDD UVM Ideally, this function would run on device and use
306  // KDDKDD UVM KokkosKernels instead of this host implementation.
307  auto targetMapToOrigRow =
308  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
309  targetMapToOrigRow_dev);
310  auto targetMapToImportRow =
311  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
312  targetMapToImportRow_dev);
313  auto Bcol2Ccol =
314  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
315  Bcol2Ccol_dev);
316  auto Icol2Ccol =
317  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
318  Icol2Ccol_dev);
319 
320  // Sizes
321  RCP<const map_type> Ccolmap = C.getColMap();
322  size_t m = Aview.origMatrix->getLocalNumRows();
323  size_t n = Ccolmap->getLocalNumElements();
324 
325  // Grab the Kokkos::SparseCrsMatrices & inner stuff
326  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
327  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
328  const KCRS & Cmat = C.getLocalMatrixHost();
329 
330  c_lno_view_t Arowptr = Amat.graph.row_map,
331  Browptr = Bmat.graph.row_map,
332  Crowptr = Cmat.graph.row_map;
333  const lno_nnz_view_t Acolind = Amat.graph.entries,
334  Bcolind = Bmat.graph.entries,
335  Ccolind = Cmat.graph.entries;
336  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
337  scalar_view_t Cvals = Cmat.values;
338 
339  c_lno_view_t Irowptr;
340  lno_nnz_view_t Icolind;
341  scalar_view_t Ivals;
342  if(!Bview.importMatrix.is_null()) {
343  auto lclB = Bview.importMatrix->getLocalMatrixHost();
344  Irowptr = lclB.graph.row_map;
345  Icolind = lclB.graph.entries;
346  Ivals = lclB.values;
347  }
348 
349 #ifdef HAVE_TPETRA_MMM_TIMINGS
350  MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
351 #endif
352 
353  // Classic csr assembly (low memory edition)
354  // mfh 27 Sep 2016: The c_status array is an implementation detail
355  // of the local sparse matrix-matrix multiply routine.
356 
357  // The status array will contain the index into colind where this entry was last deposited.
358  // c_status[i] < CSR_ip - not in the row yet
359  // c_status[i] >= CSR_ip - this is the entry where you can find the data
360  // We start with this filled with INVALID's indicating that there are no entries yet.
361  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
362  std::vector<size_t> c_status(n, ST_INVALID);
363 
364  // For each row of A/C
365  size_t CSR_ip = 0, OLD_ip = 0;
366  for (size_t i = 0; i < m; i++) {
367  // First fill the c_status array w/ locations where we're allowed to
368  // generate nonzeros for this row
369  OLD_ip = Crowptr[i];
370  CSR_ip = Crowptr[i+1];
371  for (size_t k = OLD_ip; k < CSR_ip; k++) {
372  c_status[Ccolind[k]] = k;
373 
374  // Reset values in the row of C
375  Cvals[k] = SC_ZERO;
376  }
377 
378  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
379  LO Aik = Acolind[k];
380  const SC Aval = Avals[k];
381  if (Aval == SC_ZERO)
382  continue;
383 
384  if (targetMapToOrigRow[Aik] != LO_INVALID) {
385  // Local matrix
386  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
387 
388  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
389  LO Bkj = Bcolind[j];
390  LO Cij = Bcol2Ccol[Bkj];
391 
392  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
393  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
394  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
395 
396  Cvals[c_status[Cij]] += Aval * Bvals[j];
397  }
398 
399  } else {
400  // Remote matrix
401  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
402  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
403  LO Ikj = Icolind[j];
404  LO Cij = Icol2Ccol[Ikj];
405 
406  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
407  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
408  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
409 
410  Cvals[c_status[Cij]] += Aval * Ivals[j];
411  }
412  }
413  }
414  }
415 
416  C.fillComplete(C.getDomainMap(), C.getRangeMap());
417 }
418 
419 /*********************************************************************************************************/
420 template<class Scalar,
421  class LocalOrdinal,
422  class GlobalOrdinal,
423  class LocalOrdinalViewType>
424 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
425  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
426  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
427  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
428  const LocalOrdinalViewType & Acol2Brow,
429  const LocalOrdinalViewType & Acol2Irow,
430  const LocalOrdinalViewType & Bcol2Ccol,
431  const LocalOrdinalViewType & Icol2Ccol,
432  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
433  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
434  const std::string& label,
435  const Teuchos::RCP<Teuchos::ParameterList>& params) {
436 
437 #ifdef HAVE_TPETRA_MMM_TIMINGS
438  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
439  using Teuchos::TimeMonitor;
440  Teuchos::RCP<TimeMonitor> MM;
441 #endif
442 
443  // Node-specific code
444  using Teuchos::RCP;
445 
446  // Options
447  //int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
448  std::string myalg("KK");
449  if(!params.is_null()) {
450  if(params->isParameter("sycl: jacobi algorithm"))
451  myalg = params->get("sycl: jacobi algorithm",myalg);
452  }
453 
454  if(myalg == "MSAK") {
455  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
456  }
457  else if(myalg == "KK") {
458  jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
459  }
460  else {
461  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
462  }
463 
464 #ifdef HAVE_TPETRA_MMM_TIMINGS
465  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
466 #endif
467 
468  // Final Fillcomplete
469  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
470  labelList->set("Timer Label",label);
471  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
472 
473  // NOTE: MSAK already fillCompletes, so we have to check here
474  if(!C.isFillComplete()) {
475  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
476  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
477  }
478 
479 }
480 
481 
482 
483 /*********************************************************************************************************/
484 template<class Scalar,
485  class LocalOrdinal,
486  class GlobalOrdinal,
487  class LocalOrdinalViewType>
488 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
489  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
490  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
491  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
492  const LocalOrdinalViewType & targetMapToOrigRow_dev,
493  const LocalOrdinalViewType & targetMapToImportRow_dev,
494  const LocalOrdinalViewType & Bcol2Ccol_dev,
495  const LocalOrdinalViewType & Icol2Ccol_dev,
496  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
497  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
498  const std::string& label,
499  const Teuchos::RCP<Teuchos::ParameterList>& params) {
500 
501  // FIXME: Right now, this is a cut-and-paste of the serial kernel
502  typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
503 
504 #ifdef HAVE_TPETRA_MMM_TIMINGS
505  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
506  using Teuchos::TimeMonitor;
507  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore"))));
508  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
509 #endif
510  using Teuchos::RCP;
511  using Teuchos::rcp;
512 
513  // Lots and lots of typedefs
514  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
515  typedef typename KCRS::StaticCrsGraphType graph_t;
516  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
517  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
518  typedef typename KCRS::values_type::non_const_type scalar_view_t;
519  typedef typename scalar_view_t::memory_space scalar_memory_space;
520 
521  typedef Scalar SC;
522  typedef LocalOrdinal LO;
523  typedef GlobalOrdinal GO;
524  typedef Node NO;
525  typedef Map<LO,GO,NO> map_type;
526  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
527  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
528  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
529 
530  // Since this is being run on SYCL, we need to fence because the below host code will use UVM
531  // KDDKDD typename graph_t::execution_space().fence();
532 
533  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
534  // KDDKDD UVM Ideally, this function would run on device and use
535  // KDDKDD UVM KokkosKernels instead of this host implementation.
536  auto targetMapToOrigRow =
537  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
538  targetMapToOrigRow_dev);
539  auto targetMapToImportRow =
540  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
541  targetMapToImportRow_dev);
542  auto Bcol2Ccol =
543  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
544  Bcol2Ccol_dev);
545  auto Icol2Ccol =
546  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
547  Icol2Ccol_dev);
548 
549 
550  // Sizes
551  RCP<const map_type> Ccolmap = C.getColMap();
552  size_t m = Aview.origMatrix->getLocalNumRows();
553  size_t n = Ccolmap->getLocalNumElements();
554 
555  // Grab the Kokkos::SparseCrsMatrices & inner stuff
556  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
557  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
558  const KCRS & Cmat = C.getLocalMatrixHost();
559 
560  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
561  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
562  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
563  scalar_view_t Cvals = Cmat.values;
564 
565  c_lno_view_t Irowptr;
566  lno_nnz_view_t Icolind;
567  scalar_view_t Ivals;
568  if(!Bview.importMatrix.is_null()) {
569  auto lclB = Bview.importMatrix->getLocalMatrixHost();
570  Irowptr = lclB.graph.row_map;
571  Icolind = lclB.graph.entries;
572  Ivals = lclB.values;
573  }
574 
575  // Jacobi-specific inner stuff
576  auto Dvals =
577  Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
578 
579 #ifdef HAVE_TPETRA_MMM_TIMINGS
580  MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore - Compare"))));
581 #endif
582 
583  // The status array will contain the index into colind where this entry was last deposited.
584  // c_status[i] < CSR_ip - not in the row yet
585  // c_status[i] >= CSR_ip - this is the entry where you can find the data
586  // We start with this filled with INVALID's indicating that there are no entries yet.
587  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
588  std::vector<size_t> c_status(n, ST_INVALID);
589 
590  // For each row of A/C
591  size_t CSR_ip = 0, OLD_ip = 0;
592  for (size_t i = 0; i < m; i++) {
593 
594  // First fill the c_status array w/ locations where we're allowed to
595  // generate nonzeros for this row
596  OLD_ip = Crowptr[i];
597  CSR_ip = Crowptr[i+1];
598  for (size_t k = OLD_ip; k < CSR_ip; k++) {
599  c_status[Ccolind[k]] = k;
600 
601  // Reset values in the row of C
602  Cvals[k] = SC_ZERO;
603  }
604 
605  SC minusOmegaDval = -omega*Dvals(i,0);
606 
607  // Entries of B
608  for (size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
609  Scalar Bval = Bvals[j];
610  if (Bval == SC_ZERO)
611  continue;
612  LO Bij = Bcolind[j];
613  LO Cij = Bcol2Ccol[Bij];
614 
615  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
616  std::runtime_error, "Trying to insert a new entry into a static graph");
617 
618  Cvals[c_status[Cij]] = Bvals[j];
619  }
620 
621  // Entries of -omega * Dinv * A * B
622  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
623  LO Aik = Acolind[k];
624  const SC Aval = Avals[k];
625  if (Aval == SC_ZERO)
626  continue;
627 
628  if (targetMapToOrigRow[Aik] != LO_INVALID) {
629  // Local matrix
630  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
631 
632  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
633  LO Bkj = Bcolind[j];
634  LO Cij = Bcol2Ccol[Bkj];
635 
636  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
637  std::runtime_error, "Trying to insert a new entry into a static graph");
638 
639  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
640  }
641 
642  } else {
643  // Remote matrix
644  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
645  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
646  LO Ikj = Icolind[j];
647  LO Cij = Icol2Ccol[Ikj];
648 
649  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
650  std::runtime_error, "Trying to insert a new entry into a static graph");
651 
652  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
653  }
654  }
655  }
656  }
657 
658 #ifdef HAVE_TPETRA_MMM_TIMINGS
659  MM2= Teuchos::null;
660  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
661 #endif
662 
663  C.fillComplete(C.getDomainMap(), C.getRangeMap());
664 
665 }
666 
667 /*********************************************************************************************************/
668 template<class Scalar,
669  class LocalOrdinal,
670  class GlobalOrdinal,
671  class LocalOrdinalViewType>
672 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
673  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
674  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
675  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
676  const LocalOrdinalViewType & Acol2Brow,
677  const LocalOrdinalViewType & Acol2Irow,
678  const LocalOrdinalViewType & Bcol2Ccol,
679  const LocalOrdinalViewType & Icol2Ccol,
680  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
681  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
682  const std::string& label,
683  const Teuchos::RCP<Teuchos::ParameterList>& params) {
684 
685 #ifdef HAVE_TPETRA_MMM_TIMINGS
686  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
687  using Teuchos::TimeMonitor;
688  Teuchos::RCP<TimeMonitor> MM;
689 #endif
690 
691  // Check if the diagonal entries exist in debug mode
692  const bool debug = Tpetra::Details::Behavior::debug();
693  if(debug) {
694 
695  auto rowMap = Aview.origMatrix->getRowMap();
696  Tpetra::Vector<Scalar> diags(rowMap);
697  Aview.origMatrix->getLocalDiagCopy(diags);
698  size_t diagLength = rowMap->getLocalNumElements();
699  Teuchos::Array<Scalar> diagonal(diagLength);
700  diags.get1dCopy(diagonal());
701 
702  for(size_t i = 0; i < diagLength; ++i) {
703  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
704  std::runtime_error,
705  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
706  "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
707  }
708  }
709 
710  // Usings
711  using device_t = typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
713  using graph_t = typename matrix_t::StaticCrsGraphType;
714  using lno_view_t = typename graph_t::row_map_type::non_const_type;
715  using int_view_t = Kokkos::View<int *,
716  typename lno_view_t::array_layout,
717  typename lno_view_t::memory_space,
718  typename lno_view_t::memory_traits>;
719  using c_lno_view_t = typename graph_t::row_map_type::const_type;
720  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
721  using scalar_view_t = typename matrix_t::values_type::non_const_type;
722 
723  // KokkosKernels handle
724  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
725  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
726  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
727  using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
728  typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
729  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
730 
731  // Merge the B and Bimport matrices
732  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
733 
734  // Get the properties and arrays of input matrices
735  const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
736  const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
737 
738  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
739  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
740  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
741 
742  // Arrays of the output matrix
743  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
744  lno_nnz_view_t entriesC;
745  scalar_view_t valuesC;
746 
747  // Options
748  int team_work_size = 16;
749  std::string myalg("SPGEMM_KK_MEMORY");
750  if(!params.is_null()) {
751  if(params->isParameter("sycl: algorithm"))
752  myalg = params->get("sycl: algorithm",myalg);
753  if(params->isParameter("sycl: team work size"))
754  team_work_size = params->get("sycl: team work size",team_work_size);
755  }
756 
757  // Get the algorithm mode
758  std::string nodename("SYCL");
759  std::string alg = nodename + std::string(" algorithm");
760  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
761  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
762 
763  // decide whether to use integer-typed row pointers for this spgemm
764  Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
765  const bool useIntRowptrs =
766  irph.shouldUseIntRowptrs() &&
767  Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
768 
769  if (useIntRowptrs) {
770  int_handle_t kh;
771  kh.create_spgemm_handle(alg_enum);
772  kh.set_team_work_size(team_work_size);
773 
774  int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
775 
776  auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
777  auto Bint = irph.getIntRowptrMatrix(Bmerged);
778 
779  KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
780  Aint.graph.row_map, Aint.graph.entries, false,
781  Bint.graph.row_map, Bint.graph.entries, false,
782  int_row_mapC);
783 
784  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
785  if (c_nnz_size){
786  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
787  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
788  }
789 
790  // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
791  // so need to have a special int-typed call for this as well.
792  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
793  Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
794  Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
795  int_row_mapC, entriesC, valuesC,
796  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
797  // transfer the integer rowptrs back to the correct rowptr type
798  Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
799  kh.destroy_spgemm_handle();
800  } else {
801  handle_t kh;
802  kh.create_spgemm_handle(alg_enum);
803  kh.set_team_work_size(team_work_size);
804 
805  KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
806  Amat.graph.row_map, Amat.graph.entries, false,
807  Bmerged.graph.row_map, Bmerged.graph.entries, false,
808  row_mapC);
809 
810  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
811  if (c_nnz_size){
812  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
813  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
814  }
815  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
816  Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
817  Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
818  row_mapC, entriesC, valuesC,
819  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
820  kh.destroy_spgemm_handle();
821  }
822 
823 #ifdef HAVE_TPETRA_MMM_TIMINGS
824  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLSort"))));
825 #endif
826 
827  // Sort & set values
828  if (params.is_null() || params->get("sort entries",true))
829  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
830  C.setAllValues(row_mapC,entriesC,valuesC);
831 
832 #ifdef HAVE_TPETRA_MMM_TIMINGS
833  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
834 #endif
835 
836  // Final Fillcomplete
837  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
838  labelList->set("Timer Label",label);
839  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
840  Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
841  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
842 }
843 
844  }//MMdetails
845 }//Tpetra
846 
847 #endif//SYCL
848 
849 #endif
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.