Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_SYCL.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 
43 // This is a verbatim copy of the other TpetraExt_MatrixMatrix_*.hpp files
44 // replacing the execution/memory space by the ones corresponding to SYCL.
45 #ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
46 #define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
47 
48 #ifdef HAVE_TPETRA_INST_SYCL
49 namespace Tpetra {
50 namespace MMdetails {
51 
52 /*********************************************************************************************************/
53 // MMM KernelWrappers for Partial Specialization to SYCL
54 template<class Scalar,
55  class LocalOrdinal,
56  class GlobalOrdinal,
57  class LocalOrdinalViewType>
58 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
59  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
60  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
61  const LocalOrdinalViewType & Acol2Brow,
62  const LocalOrdinalViewType & Acol2Irow,
63  const LocalOrdinalViewType & Bcol2Ccol,
64  const LocalOrdinalViewType & Icol2Ccol,
65  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
66  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
67  const std::string& label = std::string(),
68  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69 
70 
71 
72  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
73  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
74  const LocalOrdinalViewType & Acol2Brow,
75  const LocalOrdinalViewType & Acol2Irow,
76  const LocalOrdinalViewType & Bcol2Ccol,
77  const LocalOrdinalViewType & Icol2Ccol,
78  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
79  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
80  const std::string& label = std::string(),
81  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82 
83 };
84 
85 // Jacobi KernelWrappers for Partial Specialization to SYCL
86 template<class Scalar,
87  class LocalOrdinal,
88  class GlobalOrdinal, class LocalOrdinalViewType>
89 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
90  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
91  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
92  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
93  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
94  const LocalOrdinalViewType & Acol2Brow,
95  const LocalOrdinalViewType & Acol2Irow,
96  const LocalOrdinalViewType & Bcol2Ccol,
97  const LocalOrdinalViewType & Icol2Ccol,
98  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
99  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
100  const std::string& label = std::string(),
101  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
102 
103  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
104  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
105  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
106  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
107  const LocalOrdinalViewType & Acol2Brow,
108  const LocalOrdinalViewType & Acol2Irow,
109  const LocalOrdinalViewType & Bcol2Ccol,
110  const LocalOrdinalViewType & Icol2Ccol,
111  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
112  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
113  const std::string& label = std::string(),
114  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
115 
116  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
117  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
118  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
119  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
120  const LocalOrdinalViewType & Acol2Brow,
121  const LocalOrdinalViewType & Acol2Irow,
122  const LocalOrdinalViewType & Bcol2Ccol,
123  const LocalOrdinalViewType & Icol2Ccol,
124  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
125  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
126  const std::string& label = std::string(),
127  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
128 };
129 
130 
131 /*********************************************************************************************************/
132 // AB NewMatrix Kernel wrappers (KokkosKernels/SYCL Version)
133 template<class Scalar,
134  class LocalOrdinal,
135  class GlobalOrdinal,
136  class LocalOrdinalViewType>
137 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
138  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
139  const LocalOrdinalViewType & Acol2Brow,
140  const LocalOrdinalViewType & Acol2Irow,
141  const LocalOrdinalViewType & Bcol2Ccol,
142  const LocalOrdinalViewType & Icol2Ccol,
143  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
144  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
145  const std::string& label,
146  const Teuchos::RCP<Teuchos::ParameterList>& params) {
147 
148 
149 #ifdef HAVE_TPETRA_MMM_TIMINGS
150  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
151  using Teuchos::TimeMonitor;
152  Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLWrapper")))));
153 #endif
154  // Node-specific code
155  typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
156  std::string nodename("SYCL");
157 
158  // Lots and lots of typedefs
159  using Teuchos::RCP;
161  typedef typename KCRS::device_type device_t;
162  typedef typename KCRS::StaticCrsGraphType graph_t;
163  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
164  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
165  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
166  typedef typename KCRS::values_type::non_const_type scalar_view_t;
167  //typedef typename graph_t::row_map_type::const_type lno_view_t_const;
168 
169  // Options
170  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
171  std::string myalg("SPGEMM_KK_MEMORY");
172  if(!params.is_null()) {
173  if(params->isParameter("sycl: algorithm"))
174  myalg = params->get("sycl: algorithm",myalg);
175  if(params->isParameter("sycl: team work size"))
176  team_work_size = params->get("sycl: team work size",team_work_size);
177  }
178 
179  // KokkosKernelsHandle
180  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
181  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
182  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
183 
184  // Grab the Kokkos::SparseCrsMatrices
185  const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
186  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
187 
188  c_lno_view_t Arowptr = Amat.graph.row_map,
189  Browptr = Bmat.graph.row_map;
190  const lno_nnz_view_t Acolind = Amat.graph.entries,
191  Bcolind = Bmat.graph.entries;
192  const scalar_view_t Avals = Amat.values,
193  Bvals = Bmat.values;
194 
195  c_lno_view_t Irowptr;
196  lno_nnz_view_t Icolind;
197  scalar_view_t Ivals;
198  if(!Bview.importMatrix.is_null()) {
199  auto lclB = Bview.importMatrix->getLocalMatrixDevice();
200  Irowptr = lclB.graph.row_map;
201  Icolind = lclB.graph.entries;
202  Ivals = lclB.values;
203  }
204 
205 
206  // Get the algorithm mode
207  std::string alg = nodename+std::string(" algorithm");
208  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
209  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
210  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
211 
212  // Merge the B and Bimport matrices
213  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
214 
215 #ifdef HAVE_TPETRA_MMM_TIMINGS
216  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLCore"))));
217 #endif
218 
219  // Do the multiply on whatever we've got
220  typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
221  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
222  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
223 
224  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
225  lno_nnz_view_t entriesC;
226  scalar_view_t valuesC;
227  KernelHandle kh;
228  kh.create_spgemm_handle(alg_enum);
229  kh.set_team_work_size(team_work_size);
230 
231  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
232 
233  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
234  if (c_nnz_size){
235  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
236  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
237  }
238  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
239  kh.destroy_spgemm_handle();
240 
241 #ifdef HAVE_TPETRA_MMM_TIMINGS
242  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLSort"))));
243 #endif
244 
245  // Sort & set values
246  if (params.is_null() || params->get("sort entries",true))
247  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
248  C.setAllValues(row_mapC,entriesC,valuesC);
249 
250 #ifdef HAVE_TPETRA_MMM_TIMINGS
251  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLESFC"))));
252 #endif
253 
254  // Final Fillcomplete
255  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
256  labelList->set("Timer Label",label);
257  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
258  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
259  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
260 }
261 
262 
263 /*********************************************************************************************************/
264 template<class Scalar,
265  class LocalOrdinal,
266  class GlobalOrdinal,
267  class LocalOrdinalViewType>
268 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
269  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
270  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
271  const LocalOrdinalViewType & targetMapToOrigRow_dev,
272  const LocalOrdinalViewType & targetMapToImportRow_dev,
273  const LocalOrdinalViewType & Bcol2Ccol_dev,
274  const LocalOrdinalViewType & Icol2Ccol_dev,
275  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
276  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
277  const std::string& label,
278  const Teuchos::RCP<Teuchos::ParameterList>& params) {
279 
280  // FIXME: Right now, this is a cut-and-paste of the serial kernel
281  typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
282 
283 #ifdef HAVE_TPETRA_MMM_TIMINGS
284  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
285  using Teuchos::TimeMonitor;
286  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
287  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
288 #endif
289  using Teuchos::RCP;
290  using Teuchos::rcp;
291 
292 
293  // Lots and lots of typedefs
294  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
295  typedef typename KCRS::StaticCrsGraphType graph_t;
296  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
297  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
298  typedef typename KCRS::values_type::non_const_type scalar_view_t;
299 
300  typedef Scalar SC;
301  typedef LocalOrdinal LO;
302  typedef GlobalOrdinal GO;
303  typedef Node NO;
304  typedef Map<LO,GO,NO> map_type;
305  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
306  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
307  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
308 
309  // Since this is being run on SYCL, we need to fence because the below code will use UVM
310  // typename graph_t::execution_space().fence();
311 
312  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
313  // KDDKDD UVM Ideally, this function would run on device and use
314  // KDDKDD UVM KokkosKernels instead of this host implementation.
315  auto targetMapToOrigRow =
316  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317  targetMapToOrigRow_dev);
318  auto targetMapToImportRow =
319  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
320  targetMapToImportRow_dev);
321  auto Bcol2Ccol =
322  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
323  Bcol2Ccol_dev);
324  auto Icol2Ccol =
325  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
326  Icol2Ccol_dev);
327 
328  // Sizes
329  RCP<const map_type> Ccolmap = C.getColMap();
330  size_t m = Aview.origMatrix->getLocalNumRows();
331  size_t n = Ccolmap->getLocalNumElements();
332 
333  // Grab the Kokkos::SparseCrsMatrices & inner stuff
334  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
335  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
336  const KCRS & Cmat = C.getLocalMatrixHost();
337 
338  c_lno_view_t Arowptr = Amat.graph.row_map,
339  Browptr = Bmat.graph.row_map,
340  Crowptr = Cmat.graph.row_map;
341  const lno_nnz_view_t Acolind = Amat.graph.entries,
342  Bcolind = Bmat.graph.entries,
343  Ccolind = Cmat.graph.entries;
344  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
345  scalar_view_t Cvals = Cmat.values;
346 
347  c_lno_view_t Irowptr;
348  lno_nnz_view_t Icolind;
349  scalar_view_t Ivals;
350  if(!Bview.importMatrix.is_null()) {
351  auto lclB = Bview.importMatrix->getLocalMatrixHost();
352  Irowptr = lclB.graph.row_map;
353  Icolind = lclB.graph.entries;
354  Ivals = lclB.values;
355  }
356 
357 #ifdef HAVE_TPETRA_MMM_TIMINGS
358  MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
359 #endif
360 
361  // Classic csr assembly (low memory edition)
362  // mfh 27 Sep 2016: The c_status array is an implementation detail
363  // of the local sparse matrix-matrix multiply routine.
364 
365  // The status array will contain the index into colind where this entry was last deposited.
366  // c_status[i] < CSR_ip - not in the row yet
367  // c_status[i] >= CSR_ip - this is the entry where you can find the data
368  // We start with this filled with INVALID's indicating that there are no entries yet.
369  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
370  std::vector<size_t> c_status(n, ST_INVALID);
371 
372  // For each row of A/C
373  size_t CSR_ip = 0, OLD_ip = 0;
374  for (size_t i = 0; i < m; i++) {
375  // First fill the c_status array w/ locations where we're allowed to
376  // generate nonzeros for this row
377  OLD_ip = Crowptr[i];
378  CSR_ip = Crowptr[i+1];
379  for (size_t k = OLD_ip; k < CSR_ip; k++) {
380  c_status[Ccolind[k]] = k;
381 
382  // Reset values in the row of C
383  Cvals[k] = SC_ZERO;
384  }
385 
386  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
387  LO Aik = Acolind[k];
388  const SC Aval = Avals[k];
389  if (Aval == SC_ZERO)
390  continue;
391 
392  if (targetMapToOrigRow[Aik] != LO_INVALID) {
393  // Local matrix
394  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
395 
396  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
397  LO Bkj = Bcolind[j];
398  LO Cij = Bcol2Ccol[Bkj];
399 
400  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
401  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
402  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
403 
404  Cvals[c_status[Cij]] += Aval * Bvals[j];
405  }
406 
407  } else {
408  // Remote matrix
409  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
410  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
411  LO Ikj = Icolind[j];
412  LO Cij = Icol2Ccol[Ikj];
413 
414  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
415  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
416  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
417 
418  Cvals[c_status[Cij]] += Aval * Ivals[j];
419  }
420  }
421  }
422  }
423 
424  C.fillComplete(C.getDomainMap(), C.getRangeMap());
425 }
426 
427 /*********************************************************************************************************/
428 template<class Scalar,
429  class LocalOrdinal,
430  class GlobalOrdinal,
431  class LocalOrdinalViewType>
432 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
433  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
434  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
435  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
436  const LocalOrdinalViewType & Acol2Brow,
437  const LocalOrdinalViewType & Acol2Irow,
438  const LocalOrdinalViewType & Bcol2Ccol,
439  const LocalOrdinalViewType & Icol2Ccol,
440  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
441  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
442  const std::string& label,
443  const Teuchos::RCP<Teuchos::ParameterList>& params) {
444 
445 #ifdef HAVE_TPETRA_MMM_TIMINGS
446  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
447  using Teuchos::TimeMonitor;
448  Teuchos::RCP<TimeMonitor> MM;
449 #endif
450 
451  // Node-specific code
452  using Teuchos::RCP;
453 
454  // Options
455  //int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
456  std::string myalg("KK");
457  if(!params.is_null()) {
458  if(params->isParameter("sycl: jacobi algorithm"))
459  myalg = params->get("sycl: jacobi algorithm",myalg);
460  }
461 
462  if(myalg == "MSAK") {
463  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
464  }
465  else if(myalg == "KK") {
466  jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
467  }
468  else {
469  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
470  }
471 
472 #ifdef HAVE_TPETRA_MMM_TIMINGS
473  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
474 #endif
475 
476  // Final Fillcomplete
477  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
478  labelList->set("Timer Label",label);
479  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
480 
481  // NOTE: MSAK already fillCompletes, so we have to check here
482  if(!C.isFillComplete()) {
483  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
484  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
485  }
486 
487 }
488 
489 
490 
491 /*********************************************************************************************************/
492 template<class Scalar,
493  class LocalOrdinal,
494  class GlobalOrdinal,
495  class LocalOrdinalViewType>
496 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
497  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
498  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
499  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
500  const LocalOrdinalViewType & targetMapToOrigRow_dev,
501  const LocalOrdinalViewType & targetMapToImportRow_dev,
502  const LocalOrdinalViewType & Bcol2Ccol_dev,
503  const LocalOrdinalViewType & Icol2Ccol_dev,
504  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
505  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
506  const std::string& label,
507  const Teuchos::RCP<Teuchos::ParameterList>& params) {
508 
509  // FIXME: Right now, this is a cut-and-paste of the serial kernel
510  typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
511 
512 #ifdef HAVE_TPETRA_MMM_TIMINGS
513  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
514  using Teuchos::TimeMonitor;
515  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore"))));
516  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
517 #endif
518  using Teuchos::RCP;
519  using Teuchos::rcp;
520 
521  // Lots and lots of typedefs
522  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
523  typedef typename KCRS::StaticCrsGraphType graph_t;
524  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
525  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
526  typedef typename KCRS::values_type::non_const_type scalar_view_t;
527  typedef typename scalar_view_t::memory_space scalar_memory_space;
528 
529  typedef Scalar SC;
530  typedef LocalOrdinal LO;
531  typedef GlobalOrdinal GO;
532  typedef Node NO;
533  typedef Map<LO,GO,NO> map_type;
534  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
535  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
536  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
537 
538  // Since this is being run on SYCL, we need to fence because the below host code will use UVM
539  // KDDKDD typename graph_t::execution_space().fence();
540 
541  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
542  // KDDKDD UVM Ideally, this function would run on device and use
543  // KDDKDD UVM KokkosKernels instead of this host implementation.
544  auto targetMapToOrigRow =
545  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
546  targetMapToOrigRow_dev);
547  auto targetMapToImportRow =
548  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
549  targetMapToImportRow_dev);
550  auto Bcol2Ccol =
551  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
552  Bcol2Ccol_dev);
553  auto Icol2Ccol =
554  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
555  Icol2Ccol_dev);
556 
557 
558  // Sizes
559  RCP<const map_type> Ccolmap = C.getColMap();
560  size_t m = Aview.origMatrix->getLocalNumRows();
561  size_t n = Ccolmap->getLocalNumElements();
562 
563  // Grab the Kokkos::SparseCrsMatrices & inner stuff
564  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
565  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
566  const KCRS & Cmat = C.getLocalMatrixHost();
567 
568  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
569  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
570  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
571  scalar_view_t Cvals = Cmat.values;
572 
573  c_lno_view_t Irowptr;
574  lno_nnz_view_t Icolind;
575  scalar_view_t Ivals;
576  if(!Bview.importMatrix.is_null()) {
577  auto lclB = Bview.importMatrix->getLocalMatrixHost();
578  Irowptr = lclB.graph.row_map;
579  Icolind = lclB.graph.entries;
580  Ivals = lclB.values;
581  }
582 
583  // Jacobi-specific inner stuff
584  auto Dvals =
585  Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
586 
587 #ifdef HAVE_TPETRA_MMM_TIMINGS
588  MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore - Compare"))));
589 #endif
590 
591  // The status array will contain the index into colind where this entry was last deposited.
592  // c_status[i] < CSR_ip - not in the row yet
593  // c_status[i] >= CSR_ip - this is the entry where you can find the data
594  // We start with this filled with INVALID's indicating that there are no entries yet.
595  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
596  std::vector<size_t> c_status(n, ST_INVALID);
597 
598  // For each row of A/C
599  size_t CSR_ip = 0, OLD_ip = 0;
600  for (size_t i = 0; i < m; i++) {
601 
602  // First fill the c_status array w/ locations where we're allowed to
603  // generate nonzeros for this row
604  OLD_ip = Crowptr[i];
605  CSR_ip = Crowptr[i+1];
606  for (size_t k = OLD_ip; k < CSR_ip; k++) {
607  c_status[Ccolind[k]] = k;
608 
609  // Reset values in the row of C
610  Cvals[k] = SC_ZERO;
611  }
612 
613  SC minusOmegaDval = -omega*Dvals(i,0);
614 
615  // Entries of B
616  for (size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
617  Scalar Bval = Bvals[j];
618  if (Bval == SC_ZERO)
619  continue;
620  LO Bij = Bcolind[j];
621  LO Cij = Bcol2Ccol[Bij];
622 
623  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
624  std::runtime_error, "Trying to insert a new entry into a static graph");
625 
626  Cvals[c_status[Cij]] = Bvals[j];
627  }
628 
629  // Entries of -omega * Dinv * A * B
630  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
631  LO Aik = Acolind[k];
632  const SC Aval = Avals[k];
633  if (Aval == SC_ZERO)
634  continue;
635 
636  if (targetMapToOrigRow[Aik] != LO_INVALID) {
637  // Local matrix
638  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
639 
640  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
641  LO Bkj = Bcolind[j];
642  LO Cij = Bcol2Ccol[Bkj];
643 
644  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
645  std::runtime_error, "Trying to insert a new entry into a static graph");
646 
647  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
648  }
649 
650  } else {
651  // Remote matrix
652  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
653  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
654  LO Ikj = Icolind[j];
655  LO Cij = Icol2Ccol[Ikj];
656 
657  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
658  std::runtime_error, "Trying to insert a new entry into a static graph");
659 
660  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
661  }
662  }
663  }
664  }
665 
666 #ifdef HAVE_TPETRA_MMM_TIMINGS
667  MM2= Teuchos::null;
668  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
669 #endif
670 
671  C.fillComplete(C.getDomainMap(), C.getRangeMap());
672 
673 }
674 
675 /*********************************************************************************************************/
676 template<class Scalar,
677  class LocalOrdinal,
678  class GlobalOrdinal,
679  class LocalOrdinalViewType>
680 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
681  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
682  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
683  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
684  const LocalOrdinalViewType & Acol2Brow,
685  const LocalOrdinalViewType & Acol2Irow,
686  const LocalOrdinalViewType & Bcol2Ccol,
687  const LocalOrdinalViewType & Icol2Ccol,
688  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
689  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
690  const std::string& label,
691  const Teuchos::RCP<Teuchos::ParameterList>& params) {
692 
693 #ifdef HAVE_TPETRA_MMM_TIMINGS
694  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
695  using Teuchos::TimeMonitor;
696  Teuchos::RCP<TimeMonitor> MM;
697 #endif
698 
699  // Check if the diagonal entries exist in debug mode
700  const bool debug = Tpetra::Details::Behavior::debug();
701  if(debug) {
702 
703  auto rowMap = Aview.origMatrix->getRowMap();
704  Tpetra::Vector<Scalar> diags(rowMap);
705  Aview.origMatrix->getLocalDiagCopy(diags);
706  size_t diagLength = rowMap->getLocalNumElements();
707  Teuchos::Array<Scalar> diagonal(diagLength);
708  diags.get1dCopy(diagonal());
709 
710  for(size_t i = 0; i < diagLength; ++i) {
711  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
712  std::runtime_error,
713  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
714  "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
715  }
716  }
717 
718  // Usings
719  using device_t = typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
721  using graph_t = typename matrix_t::StaticCrsGraphType;
722  using lno_view_t = typename graph_t::row_map_type::non_const_type;
723  using c_lno_view_t = typename graph_t::row_map_type::const_type;
724  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
725  using scalar_view_t = typename matrix_t::values_type::non_const_type;
726 
727  // KokkosKernels handle
728  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
729  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
730  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
731 
732  // Get the rowPtr, colInd and vals of importMatrix
733  c_lno_view_t Irowptr;
734  lno_nnz_view_t Icolind;
735  scalar_view_t Ivals;
736  if(!Bview.importMatrix.is_null()) {
737  auto lclB = Bview.importMatrix->getLocalMatrixDevice();
738  Irowptr = lclB.graph.row_map;
739  Icolind = lclB.graph.entries;
740  Ivals = lclB.values;
741  }
742 
743  // Merge the B and Bimport matrices
744  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
745 
746  // Get the properties and arrays of input matrices
747  const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
748  const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
749 
750  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
751  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
752  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
753 
754  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
755  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
756  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
757 
758  // Arrays of the output matrix
759  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
760  lno_nnz_view_t entriesC;
761  scalar_view_t valuesC;
762 
763  // Options
764  int team_work_size = 16;
765  std::string myalg("SPGEMM_KK_MEMORY");
766  if(!params.is_null()) {
767  if(params->isParameter("sycl: algorithm"))
768  myalg = params->get("sycl: algorithm",myalg);
769  if(params->isParameter("sycl: team work size"))
770  team_work_size = params->get("sycl: team work size",team_work_size);
771  }
772 
773  // Get the algorithm mode
774  std::string nodename("SYCL");
775  std::string alg = nodename + std::string(" algorithm");
776  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
777  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
778 
779 
780  // KokkosKernels call
781  handle_t kh;
782  kh.create_spgemm_handle(alg_enum);
783  kh.set_team_work_size(team_work_size);
784 
785  KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
786  Arowptr, Acolind, false,
787  Browptr, Bcolind, false,
788  row_mapC);
789 
790  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
791  if (c_nnz_size){
792  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
793  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
794  }
795 
796  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
797  Arowptr, Acolind, Avals, false,
798  Browptr, Bcolind, Bvals, false,
799  row_mapC, entriesC, valuesC,
800  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
801  kh.destroy_spgemm_handle();
802 
803 #ifdef HAVE_TPETRA_MMM_TIMINGS
804  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLSort"))));
805 #endif
806 
807  // Sort & set values
808  if (params.is_null() || params->get("sort entries",true))
809  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
810  C.setAllValues(row_mapC,entriesC,valuesC);
811 
812 #ifdef HAVE_TPETRA_MMM_TIMINGS
813  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
814 #endif
815 
816  // Final Fillcomplete
817  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
818  labelList->set("Timer Label",label);
819  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
820  Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
821  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
822 }
823 
824  }//MMdetails
825 }//Tpetra
826 
827 #endif//SYCL
828 
829 #endif
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.