45 #ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
46 #define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
48 #ifdef HAVE_TPETRA_INST_SYCL
54 template<
class Scalar,
57 class LocalOrdinalViewType>
58 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
59 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
66 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
72 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
79 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
86 template<
class Scalar,
88 class GlobalOrdinal,
class LocalOrdinalViewType>
89 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
90 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
91 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
92 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
93 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
94 const LocalOrdinalViewType & Acol2Brow,
95 const LocalOrdinalViewType & Acol2Irow,
96 const LocalOrdinalViewType & Bcol2Ccol,
97 const LocalOrdinalViewType & Icol2Ccol,
98 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
99 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
100 const std::string& label = std::string(),
101 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
103 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
104 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
106 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
107 const LocalOrdinalViewType & Acol2Brow,
108 const LocalOrdinalViewType & Acol2Irow,
109 const LocalOrdinalViewType & Bcol2Ccol,
110 const LocalOrdinalViewType & Icol2Ccol,
111 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
112 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
113 const std::string& label = std::string(),
114 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
116 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
117 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
118 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
119 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
120 const LocalOrdinalViewType & Acol2Brow,
121 const LocalOrdinalViewType & Acol2Irow,
122 const LocalOrdinalViewType & Bcol2Ccol,
123 const LocalOrdinalViewType & Icol2Ccol,
124 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
125 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
126 const std::string& label = std::string(),
127 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
133 template<
class Scalar,
136 class LocalOrdinalViewType>
137 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
138 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
139 const LocalOrdinalViewType & Acol2Brow,
140 const LocalOrdinalViewType & Acol2Irow,
141 const LocalOrdinalViewType & Bcol2Ccol,
142 const LocalOrdinalViewType & Icol2Ccol,
143 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
144 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
145 const std::string& label,
146 const Teuchos::RCP<Teuchos::ParameterList>& params) {
149 #ifdef HAVE_TPETRA_MMM_TIMINGS
150 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
151 using Teuchos::TimeMonitor;
152 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLWrapper")))));
155 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
156 std::string nodename(
"SYCL");
161 typedef typename KCRS::device_type device_t;
162 typedef typename KCRS::StaticCrsGraphType graph_t;
163 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
164 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
165 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
166 typedef typename KCRS::values_type::non_const_type scalar_view_t;
170 int team_work_size = 16;
171 std::string myalg(
"SPGEMM_KK_MEMORY");
172 if(!params.is_null()) {
173 if(params->isParameter(
"sycl: algorithm"))
174 myalg = params->get(
"sycl: algorithm",myalg);
175 if(params->isParameter(
"sycl: team work size"))
176 team_work_size = params->get(
"sycl: team work size",team_work_size);
180 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
181 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
182 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
185 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
186 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
188 c_lno_view_t Arowptr = Amat.graph.row_map,
189 Browptr = Bmat.graph.row_map;
190 const lno_nnz_view_t Acolind = Amat.graph.entries,
191 Bcolind = Bmat.graph.entries;
192 const scalar_view_t Avals = Amat.values,
195 c_lno_view_t Irowptr;
196 lno_nnz_view_t Icolind;
198 if(!Bview.importMatrix.is_null()) {
199 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
200 Irowptr = lclB.graph.row_map;
201 Icolind = lclB.graph.entries;
207 std::string alg = nodename+std::string(
" algorithm");
209 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
210 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
213 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
215 #ifdef HAVE_TPETRA_MMM_TIMINGS
216 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLCore"))));
220 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
221 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
222 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
224 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
225 lno_nnz_view_t entriesC;
226 scalar_view_t valuesC;
228 kh.create_spgemm_handle(alg_enum);
229 kh.set_team_work_size(team_work_size);
231 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
233 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
235 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
236 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
238 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
239 kh.destroy_spgemm_handle();
241 #ifdef HAVE_TPETRA_MMM_TIMINGS
242 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLSort"))));
246 if (params.is_null() || params->get(
"sort entries",
true))
247 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
248 C.setAllValues(row_mapC,entriesC,valuesC);
250 #ifdef HAVE_TPETRA_MMM_TIMINGS
251 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLESFC"))));
255 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
256 labelList->set(
"Timer Label",label);
257 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
258 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
259 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
264 template<
class Scalar,
267 class LocalOrdinalViewType>
268 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
269 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
270 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
271 const LocalOrdinalViewType & targetMapToOrigRow_dev,
272 const LocalOrdinalViewType & targetMapToImportRow_dev,
273 const LocalOrdinalViewType & Bcol2Ccol_dev,
274 const LocalOrdinalViewType & Icol2Ccol_dev,
275 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
276 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
277 const std::string& label,
278 const Teuchos::RCP<Teuchos::ParameterList>& params) {
281 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
283 #ifdef HAVE_TPETRA_MMM_TIMINGS
284 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
285 using Teuchos::TimeMonitor;
286 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
287 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
294 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
295 typedef typename KCRS::StaticCrsGraphType graph_t;
296 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
297 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
298 typedef typename KCRS::values_type::non_const_type scalar_view_t;
301 typedef LocalOrdinal LO;
302 typedef GlobalOrdinal GO;
304 typedef Map<LO,GO,NO> map_type;
305 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
306 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
307 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
315 auto targetMapToOrigRow =
316 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317 targetMapToOrigRow_dev);
318 auto targetMapToImportRow =
319 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
320 targetMapToImportRow_dev);
322 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
325 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
329 RCP<const map_type> Ccolmap = C.getColMap();
330 size_t m = Aview.origMatrix->getLocalNumRows();
331 size_t n = Ccolmap->getLocalNumElements();
334 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
335 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
336 const KCRS & Cmat = C.getLocalMatrixHost();
338 c_lno_view_t Arowptr = Amat.graph.row_map,
339 Browptr = Bmat.graph.row_map,
340 Crowptr = Cmat.graph.row_map;
341 const lno_nnz_view_t Acolind = Amat.graph.entries,
342 Bcolind = Bmat.graph.entries,
343 Ccolind = Cmat.graph.entries;
344 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
345 scalar_view_t Cvals = Cmat.values;
347 c_lno_view_t Irowptr;
348 lno_nnz_view_t Icolind;
350 if(!Bview.importMatrix.is_null()) {
351 auto lclB = Bview.importMatrix->getLocalMatrixHost();
352 Irowptr = lclB.graph.row_map;
353 Icolind = lclB.graph.entries;
357 #ifdef HAVE_TPETRA_MMM_TIMINGS
358 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
370 std::vector<size_t> c_status(n, ST_INVALID);
373 size_t CSR_ip = 0, OLD_ip = 0;
374 for (
size_t i = 0; i < m; i++) {
378 CSR_ip = Crowptr[i+1];
379 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
380 c_status[Ccolind[k]] = k;
386 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
388 const SC Aval = Avals[k];
392 if (targetMapToOrigRow[Aik] != LO_INVALID) {
394 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
396 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
398 LO Cij = Bcol2Ccol[Bkj];
400 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
401 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
402 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
404 Cvals[c_status[Cij]] += Aval * Bvals[j];
409 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
410 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
412 LO Cij = Icol2Ccol[Ikj];
414 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
415 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
416 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
418 Cvals[c_status[Cij]] += Aval * Ivals[j];
424 C.fillComplete(C.getDomainMap(), C.getRangeMap());
428 template<
class Scalar,
431 class LocalOrdinalViewType>
432 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
433 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
434 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
435 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
436 const LocalOrdinalViewType & Acol2Brow,
437 const LocalOrdinalViewType & Acol2Irow,
438 const LocalOrdinalViewType & Bcol2Ccol,
439 const LocalOrdinalViewType & Icol2Ccol,
440 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
441 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
442 const std::string& label,
443 const Teuchos::RCP<Teuchos::ParameterList>& params) {
445 #ifdef HAVE_TPETRA_MMM_TIMINGS
446 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
447 using Teuchos::TimeMonitor;
448 Teuchos::RCP<TimeMonitor> MM;
456 std::string myalg(
"KK");
457 if(!params.is_null()) {
458 if(params->isParameter(
"sycl: jacobi algorithm"))
459 myalg = params->get(
"sycl: jacobi algorithm",myalg);
462 if(myalg ==
"MSAK") {
463 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
465 else if(myalg ==
"KK") {
466 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
469 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
472 #ifdef HAVE_TPETRA_MMM_TIMINGS
473 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
477 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
478 labelList->set(
"Timer Label",label);
479 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
482 if(!C.isFillComplete()) {
483 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
484 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
492 template<
class Scalar,
495 class LocalOrdinalViewType>
496 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
497 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
498 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
499 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
500 const LocalOrdinalViewType & targetMapToOrigRow_dev,
501 const LocalOrdinalViewType & targetMapToImportRow_dev,
502 const LocalOrdinalViewType & Bcol2Ccol_dev,
503 const LocalOrdinalViewType & Icol2Ccol_dev,
504 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
505 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
506 const std::string& label,
507 const Teuchos::RCP<Teuchos::ParameterList>& params) {
510 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
512 #ifdef HAVE_TPETRA_MMM_TIMINGS
513 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
514 using Teuchos::TimeMonitor;
515 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore"))));
516 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
522 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
523 typedef typename KCRS::StaticCrsGraphType graph_t;
524 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
525 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
526 typedef typename KCRS::values_type::non_const_type scalar_view_t;
527 typedef typename scalar_view_t::memory_space scalar_memory_space;
530 typedef LocalOrdinal LO;
531 typedef GlobalOrdinal GO;
533 typedef Map<LO,GO,NO> map_type;
534 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
535 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
536 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
544 auto targetMapToOrigRow =
545 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
546 targetMapToOrigRow_dev);
547 auto targetMapToImportRow =
548 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
549 targetMapToImportRow_dev);
551 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
554 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
559 RCP<const map_type> Ccolmap = C.getColMap();
560 size_t m = Aview.origMatrix->getLocalNumRows();
561 size_t n = Ccolmap->getLocalNumElements();
564 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
565 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
566 const KCRS & Cmat = C.getLocalMatrixHost();
568 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
569 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
570 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
571 scalar_view_t Cvals = Cmat.values;
573 c_lno_view_t Irowptr;
574 lno_nnz_view_t Icolind;
576 if(!Bview.importMatrix.is_null()) {
577 auto lclB = Bview.importMatrix->getLocalMatrixHost();
578 Irowptr = lclB.graph.row_map;
579 Icolind = lclB.graph.entries;
585 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
587 #ifdef HAVE_TPETRA_MMM_TIMINGS
588 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore - Compare"))));
596 std::vector<size_t> c_status(n, ST_INVALID);
599 size_t CSR_ip = 0, OLD_ip = 0;
600 for (
size_t i = 0; i < m; i++) {
605 CSR_ip = Crowptr[i+1];
606 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
607 c_status[Ccolind[k]] = k;
613 SC minusOmegaDval = -omega*Dvals(i,0);
616 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
617 Scalar Bval = Bvals[j];
621 LO Cij = Bcol2Ccol[Bij];
623 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
624 std::runtime_error,
"Trying to insert a new entry into a static graph");
626 Cvals[c_status[Cij]] = Bvals[j];
630 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
632 const SC Aval = Avals[k];
636 if (targetMapToOrigRow[Aik] != LO_INVALID) {
638 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
640 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
642 LO Cij = Bcol2Ccol[Bkj];
644 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
645 std::runtime_error,
"Trying to insert a new entry into a static graph");
647 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
652 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
653 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
655 LO Cij = Icol2Ccol[Ikj];
657 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
658 std::runtime_error,
"Trying to insert a new entry into a static graph");
660 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
666 #ifdef HAVE_TPETRA_MMM_TIMINGS
668 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
671 C.fillComplete(C.getDomainMap(), C.getRangeMap());
676 template<
class Scalar,
679 class LocalOrdinalViewType>
680 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
681 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
682 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
683 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
684 const LocalOrdinalViewType & Acol2Brow,
685 const LocalOrdinalViewType & Acol2Irow,
686 const LocalOrdinalViewType & Bcol2Ccol,
687 const LocalOrdinalViewType & Icol2Ccol,
688 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
689 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
690 const std::string& label,
691 const Teuchos::RCP<Teuchos::ParameterList>& params) {
693 #ifdef HAVE_TPETRA_MMM_TIMINGS
694 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
695 using Teuchos::TimeMonitor;
696 Teuchos::RCP<TimeMonitor> MM;
703 auto rowMap = Aview.origMatrix->getRowMap();
705 Aview.origMatrix->getLocalDiagCopy(diags);
706 size_t diagLength = rowMap->getLocalNumElements();
707 Teuchos::Array<Scalar> diagonal(diagLength);
708 diags.get1dCopy(diagonal());
710 for(
size_t i = 0; i < diagLength; ++i) {
711 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
713 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
714 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
719 using device_t =
typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
721 using graph_t =
typename matrix_t::StaticCrsGraphType;
722 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
723 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
724 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
725 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
728 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
729 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
730 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
733 c_lno_view_t Irowptr;
734 lno_nnz_view_t Icolind;
736 if(!Bview.importMatrix.is_null()) {
737 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
738 Irowptr = lclB.graph.row_map;
739 Icolind = lclB.graph.entries;
744 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
747 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
748 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
750 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
751 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
752 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
754 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
755 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
756 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
759 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
760 lno_nnz_view_t entriesC;
761 scalar_view_t valuesC;
764 int team_work_size = 16;
765 std::string myalg(
"SPGEMM_KK_MEMORY");
766 if(!params.is_null()) {
767 if(params->isParameter(
"sycl: algorithm"))
768 myalg = params->get(
"sycl: algorithm",myalg);
769 if(params->isParameter(
"sycl: team work size"))
770 team_work_size = params->get(
"sycl: team work size",team_work_size);
774 std::string nodename(
"SYCL");
775 std::string alg = nodename + std::string(
" algorithm");
776 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
777 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
782 kh.create_spgemm_handle(alg_enum);
783 kh.set_team_work_size(team_work_size);
785 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
786 Arowptr, Acolind,
false,
787 Browptr, Bcolind,
false,
790 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
792 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
793 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
796 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
797 Arowptr, Acolind, Avals,
false,
798 Browptr, Bcolind, Bvals,
false,
799 row_mapC, entriesC, valuesC,
800 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
801 kh.destroy_spgemm_handle();
803 #ifdef HAVE_TPETRA_MMM_TIMINGS
804 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLSort"))));
808 if (params.is_null() || params->get(
"sort entries",
true))
809 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
810 C.setAllValues(row_mapC,entriesC,valuesC);
812 #ifdef HAVE_TPETRA_MMM_TIMINGS
813 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
817 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
818 labelList->set(
"Timer Label",label);
819 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
820 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
821 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.