10 #ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
13 #include "Tpetra_Details_IntRowPtrHelper.hpp"
15 #ifdef HAVE_TPETRA_INST_SYCL
21 template <
class Scalar,
24 class LocalOrdinalViewType>
25 struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
28 const LocalOrdinalViewType& Acol2Brow,
29 const LocalOrdinalViewType& Acol2Irow,
30 const LocalOrdinalViewType& Bcol2Ccol,
31 const LocalOrdinalViewType& Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
33 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
37 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
38 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
39 const LocalOrdinalViewType& Acol2Brow,
40 const LocalOrdinalViewType& Acol2Irow,
41 const LocalOrdinalViewType& Bcol2Ccol,
42 const LocalOrdinalViewType& Icol2Ccol,
43 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
44 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
45 const std::string& label = std::string(),
46 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
50 template <
class Scalar,
52 class GlobalOrdinal,
class LocalOrdinalViewType>
53 struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType> {
54 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
55 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
58 const LocalOrdinalViewType& Acol2Brow,
59 const LocalOrdinalViewType& Acol2Irow,
60 const LocalOrdinalViewType& Bcol2Ccol,
61 const LocalOrdinalViewType& Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
63 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
67 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
68 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
69 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
71 const LocalOrdinalViewType& Acol2Brow,
72 const LocalOrdinalViewType& Acol2Irow,
73 const LocalOrdinalViewType& Bcol2Ccol,
74 const LocalOrdinalViewType& Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
76 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
80 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
81 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
82 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
83 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
84 const LocalOrdinalViewType& Acol2Brow,
85 const LocalOrdinalViewType& Acol2Irow,
86 const LocalOrdinalViewType& Bcol2Ccol,
87 const LocalOrdinalViewType& Icol2Ccol,
88 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
89 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
90 const std::string& label = std::string(),
91 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
96 template <
class Scalar,
99 class LocalOrdinalViewType>
100 void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
102 const LocalOrdinalViewType& Acol2Brow,
103 const LocalOrdinalViewType& Acol2Irow,
104 const LocalOrdinalViewType& Bcol2Ccol,
105 const LocalOrdinalViewType& Icol2Ccol,
106 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
107 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
108 const std::string& label,
109 const Teuchos::RCP<Teuchos::ParameterList>& params) {
110 #ifdef HAVE_TPETRA_MMM_TIMINGS
111 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
112 using Teuchos::TimeMonitor;
113 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLWrapper")))));
116 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
117 std::string nodename(
"SYCL");
122 typedef typename KCRS::device_type device_t;
123 typedef typename KCRS::StaticCrsGraphType graph_t;
124 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
125 using int_view_t = Kokkos::View<int*, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
126 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
127 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
128 typedef typename KCRS::values_type::non_const_type scalar_view_t;
132 int team_work_size = 16;
133 std::string myalg(
"SPGEMM_KK_MEMORY");
134 if (!params.is_null()) {
135 if (params->isParameter(
"sycl: algorithm"))
136 myalg = params->get(
"sycl: algorithm", myalg);
137 if (params->isParameter(
"sycl: team work size"))
138 team_work_size = params->get(
"sycl: team work size", team_work_size);
142 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
143 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
144 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space>
146 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
147 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
148 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space>;
151 const KCRS& Amat = Aview.origMatrix->getLocalMatrixDevice();
152 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixDevice();
154 c_lno_view_t Arowptr = Amat.graph.row_map,
155 Browptr = Bmat.graph.row_map;
156 const lno_nnz_view_t Acolind = Amat.graph.entries,
157 Bcolind = Bmat.graph.entries;
158 const scalar_view_t Avals = Amat.values,
162 std::string alg = nodename + std::string(
" algorithm");
164 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
165 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
168 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
170 #ifdef HAVE_TPETRA_MMM_TIMINGS
172 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLCore"))));
176 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
177 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
178 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
180 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"non_const_lno_row"), AnumRows + 1);
181 lno_nnz_view_t entriesC;
182 scalar_view_t valuesC;
187 const bool useIntRowptrs =
188 irph.shouldUseIntRowptrs() &&
189 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
193 kh.create_spgemm_handle(alg_enum);
194 kh.set_team_work_size(team_work_size);
196 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"non_const_int_row"), AnumRows + 1);
198 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
199 auto Bint = irph.getIntRowptrMatrix(Bmerged);
200 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries,
false, Bint.graph.row_map, Bint.graph.entries,
false, int_row_mapC);
202 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
204 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
205 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
207 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Aint.graph.row_map, Aint.graph.entries, Aint.values,
false, Bint.graph.row_map, Bint.graph.entries, Bint.values,
false, int_row_mapC, entriesC, valuesC);
209 Kokkos::parallel_for(
210 int_row_mapC.size(), KOKKOS_LAMBDA(
int i) { row_mapC(i) = int_row_mapC(i); });
211 kh.destroy_spgemm_handle();
215 kh.create_spgemm_handle(alg_enum);
216 kh.set_team_work_size(team_work_size);
218 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries,
false, Bmerged.graph.row_map, Bmerged.graph.entries,
false, row_mapC);
220 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
222 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
223 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
226 KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Amat.graph.row_map, Amat.graph.entries, Amat.values,
false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values,
false, row_mapC, entriesC, valuesC);
227 kh.destroy_spgemm_handle();
230 #ifdef HAVE_TPETRA_MMM_TIMINGS
232 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLSort"))));
236 if (params.is_null() || params->get(
"sort entries",
true))
237 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
238 C.setAllValues(row_mapC, entriesC, valuesC);
240 #ifdef HAVE_TPETRA_MMM_TIMINGS
242 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLESFC"))));
246 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
247 labelList->set(
"Timer Label", label);
248 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
249 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
250 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
254 template <
class Scalar,
257 class LocalOrdinalViewType>
258 void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
259 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
260 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
261 const LocalOrdinalViewType& targetMapToOrigRow_dev,
262 const LocalOrdinalViewType& targetMapToImportRow_dev,
263 const LocalOrdinalViewType& Bcol2Ccol_dev,
264 const LocalOrdinalViewType& Icol2Ccol_dev,
265 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
266 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
267 const std::string& label,
268 const Teuchos::RCP<Teuchos::ParameterList>& params) {
270 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
272 #ifdef HAVE_TPETRA_MMM_TIMINGS
273 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
274 using Teuchos::TimeMonitor;
275 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
276 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
282 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
283 typedef typename KCRS::StaticCrsGraphType graph_t;
284 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
285 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
286 typedef typename KCRS::values_type::non_const_type scalar_view_t;
289 typedef LocalOrdinal LO;
290 typedef GlobalOrdinal GO;
292 typedef Map<LO, GO, NO> map_type;
293 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
294 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
295 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
303 auto targetMapToOrigRow =
304 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
305 targetMapToOrigRow_dev);
306 auto targetMapToImportRow =
307 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
308 targetMapToImportRow_dev);
310 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
313 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317 RCP<const map_type> Ccolmap = C.getColMap();
318 size_t m = Aview.origMatrix->getLocalNumRows();
319 size_t n = Ccolmap->getLocalNumElements();
322 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
323 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
324 const KCRS& Cmat = C.getLocalMatrixHost();
326 c_lno_view_t Arowptr = Amat.graph.row_map,
327 Browptr = Bmat.graph.row_map,
328 Crowptr = Cmat.graph.row_map;
329 const lno_nnz_view_t Acolind = Amat.graph.entries,
330 Bcolind = Bmat.graph.entries,
331 Ccolind = Cmat.graph.entries;
332 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
333 scalar_view_t Cvals = Cmat.values;
335 c_lno_view_t Irowptr;
336 lno_nnz_view_t Icolind;
338 if (!Bview.importMatrix.is_null()) {
339 auto lclB = Bview.importMatrix->getLocalMatrixHost();
340 Irowptr = lclB.graph.row_map;
341 Icolind = lclB.graph.entries;
345 #ifdef HAVE_TPETRA_MMM_TIMINGS
347 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
359 std::vector<size_t> c_status(n, ST_INVALID);
362 size_t CSR_ip = 0, OLD_ip = 0;
363 for (
size_t i = 0; i < m; i++) {
367 CSR_ip = Crowptr[i + 1];
368 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
369 c_status[Ccolind[k]] = k;
375 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
377 const SC Aval = Avals[k];
381 if (targetMapToOrigRow[Aik] != LO_INVALID) {
383 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
385 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
387 LO Cij = Bcol2Ccol[Bkj];
389 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
390 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph "
391 <<
"(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
393 Cvals[c_status[Cij]] += Aval * Bvals[j];
398 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
399 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
401 LO Cij = Icol2Ccol[Ikj];
403 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
404 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph "
405 <<
"(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
407 Cvals[c_status[Cij]] += Aval * Ivals[j];
413 C.fillComplete(C.getDomainMap(), C.getRangeMap());
417 template <
class Scalar,
420 class LocalOrdinalViewType>
421 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
422 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
423 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
424 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
425 const LocalOrdinalViewType& Acol2Brow,
426 const LocalOrdinalViewType& Acol2Irow,
427 const LocalOrdinalViewType& Bcol2Ccol,
428 const LocalOrdinalViewType& Icol2Ccol,
429 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
430 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
431 const std::string& label,
432 const Teuchos::RCP<Teuchos::ParameterList>& params) {
433 #ifdef HAVE_TPETRA_MMM_TIMINGS
434 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
435 using Teuchos::TimeMonitor;
436 Teuchos::RCP<TimeMonitor> MM;
444 std::string myalg(
"KK");
445 if (!params.is_null()) {
446 if (params->isParameter(
"sycl: jacobi algorithm"))
447 myalg = params->get(
"sycl: jacobi algorithm", myalg);
450 if (myalg ==
"MSAK") {
451 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
452 }
else if (myalg ==
"KK") {
453 jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
455 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
458 #ifdef HAVE_TPETRA_MMM_TIMINGS
460 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
464 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
465 labelList->set(
"Timer Label", label);
466 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
469 if (!C.isFillComplete()) {
470 RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
471 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
476 template <
class Scalar,
479 class LocalOrdinalViewType>
480 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
481 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
482 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
483 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
484 const LocalOrdinalViewType& targetMapToOrigRow_dev,
485 const LocalOrdinalViewType& targetMapToImportRow_dev,
486 const LocalOrdinalViewType& Bcol2Ccol_dev,
487 const LocalOrdinalViewType& Icol2Ccol_dev,
488 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
489 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
490 const std::string& label,
491 const Teuchos::RCP<Teuchos::ParameterList>& params) {
493 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
495 #ifdef HAVE_TPETRA_MMM_TIMINGS
496 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
497 using Teuchos::TimeMonitor;
498 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore"))));
499 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
505 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
506 typedef typename KCRS::StaticCrsGraphType graph_t;
507 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
508 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
509 typedef typename KCRS::values_type::non_const_type scalar_view_t;
510 typedef typename scalar_view_t::memory_space scalar_memory_space;
513 typedef LocalOrdinal LO;
514 typedef GlobalOrdinal GO;
516 typedef Map<LO, GO, NO> map_type;
517 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
518 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
519 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
527 auto targetMapToOrigRow =
528 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
529 targetMapToOrigRow_dev);
530 auto targetMapToImportRow =
531 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
532 targetMapToImportRow_dev);
534 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
537 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
541 RCP<const map_type> Ccolmap = C.getColMap();
542 size_t m = Aview.origMatrix->getLocalNumRows();
543 size_t n = Ccolmap->getLocalNumElements();
546 const KCRS& Amat = Aview.origMatrix->getLocalMatrixHost();
547 const KCRS& Bmat = Bview.origMatrix->getLocalMatrixHost();
548 const KCRS& Cmat = C.getLocalMatrixHost();
550 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
551 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
552 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
553 scalar_view_t Cvals = Cmat.values;
555 c_lno_view_t Irowptr;
556 lno_nnz_view_t Icolind;
558 if (!Bview.importMatrix.is_null()) {
559 auto lclB = Bview.importMatrix->getLocalMatrixHost();
560 Irowptr = lclB.graph.row_map;
561 Icolind = lclB.graph.entries;
567 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
569 #ifdef HAVE_TPETRA_MMM_TIMINGS
571 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore - Compare"))));
579 std::vector<size_t> c_status(n, ST_INVALID);
582 size_t CSR_ip = 0, OLD_ip = 0;
583 for (
size_t i = 0; i < m; i++) {
587 CSR_ip = Crowptr[i + 1];
588 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
589 c_status[Ccolind[k]] = k;
595 SC minusOmegaDval = -omega * Dvals(i, 0);
598 for (
size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
599 Scalar Bval = Bvals[j];
603 LO Cij = Bcol2Ccol[Bij];
605 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
606 std::runtime_error,
"Trying to insert a new entry into a static graph");
608 Cvals[c_status[Cij]] = Bvals[j];
612 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
614 const SC Aval = Avals[k];
618 if (targetMapToOrigRow[Aik] != LO_INVALID) {
620 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
622 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
624 LO Cij = Bcol2Ccol[Bkj];
626 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
627 std::runtime_error,
"Trying to insert a new entry into a static graph");
629 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
634 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
635 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
637 LO Cij = Icol2Ccol[Ikj];
639 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
640 std::runtime_error,
"Trying to insert a new entry into a static graph");
642 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
648 #ifdef HAVE_TPETRA_MMM_TIMINGS
651 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
654 C.fillComplete(C.getDomainMap(), C.getRangeMap());
658 template <
class Scalar,
661 class LocalOrdinalViewType>
662 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
663 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Dinv,
664 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
665 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
666 const LocalOrdinalViewType& Acol2Brow,
667 const LocalOrdinalViewType& Acol2Irow,
668 const LocalOrdinalViewType& Bcol2Ccol,
669 const LocalOrdinalViewType& Icol2Ccol,
670 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
671 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
672 const std::string& label,
673 const Teuchos::RCP<Teuchos::ParameterList>& params) {
674 #ifdef HAVE_TPETRA_MMM_TIMINGS
675 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
676 using Teuchos::TimeMonitor;
677 Teuchos::RCP<TimeMonitor> MM;
683 auto rowMap = Aview.origMatrix->getRowMap();
685 Aview.origMatrix->getLocalDiagCopy(diags);
686 size_t diagLength = rowMap->getLocalNumElements();
687 Teuchos::Array<Scalar> diagonal(diagLength);
688 diags.get1dCopy(diagonal());
690 for (
size_t i = 0; i < diagLength; ++i) {
691 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
693 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
694 <<
"KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
699 using device_t =
typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
701 using graph_t =
typename matrix_t::StaticCrsGraphType;
702 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
703 using int_view_t = Kokkos::View<
int*,
704 typename lno_view_t::array_layout,
705 typename lno_view_t::memory_space,
706 typename lno_view_t::memory_traits>;
707 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
708 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
709 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
712 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
713 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
714 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space>;
715 using int_handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
716 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
717 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space>;
720 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
723 const matrix_t& Amat = Aview.origMatrix->getLocalMatrixDevice();
724 const matrix_t& Bmat = Bview.origMatrix->getLocalMatrixDevice();
726 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
727 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
728 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
731 lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"row_mapC"), AnumRows + 1);
732 lno_nnz_view_t entriesC;
733 scalar_view_t valuesC;
736 int team_work_size = 16;
737 std::string myalg(
"SPGEMM_KK_MEMORY");
738 if (!params.is_null()) {
739 if (params->isParameter(
"sycl: algorithm"))
740 myalg = params->get(
"sycl: algorithm", myalg);
741 if (params->isParameter(
"sycl: team work size"))
742 team_work_size = params->get(
"sycl: team work size", team_work_size);
746 std::string nodename(
"SYCL");
747 std::string alg = nodename + std::string(
" algorithm");
748 if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
749 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
753 const bool useIntRowptrs =
754 irph.shouldUseIntRowptrs() &&
755 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
759 kh.create_spgemm_handle(alg_enum);
760 kh.set_team_work_size(team_work_size);
762 int_view_t int_row_mapC(Kokkos::ViewAllocateWithoutInitializing(
"int_row_mapC"), AnumRows + 1);
764 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
765 auto Bint = irph.getIntRowptrMatrix(Bmerged);
767 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
768 Aint.graph.row_map, Aint.graph.entries,
false,
769 Bint.graph.row_map, Bint.graph.entries,
false,
772 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
774 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
775 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
780 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
781 Aint.graph.row_map, Aint.graph.entries, Amat.values,
false,
782 Bint.graph.row_map, Bint.graph.entries, Bint.values,
false,
783 int_row_mapC, entriesC, valuesC,
784 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
786 Kokkos::parallel_for(
787 int_row_mapC.size(), KOKKOS_LAMBDA(
int i) { row_mapC(i) = int_row_mapC(i); });
788 kh.destroy_spgemm_handle();
791 kh.create_spgemm_handle(alg_enum);
792 kh.set_team_work_size(team_work_size);
794 KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
795 Amat.graph.row_map, Amat.graph.entries,
false,
796 Bmerged.graph.row_map, Bmerged.graph.entries,
false,
799 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
801 entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
802 valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
804 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
805 Amat.graph.row_map, Amat.graph.entries, Amat.values,
false,
806 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values,
false,
807 row_mapC, entriesC, valuesC,
808 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
809 kh.destroy_spgemm_handle();
812 #ifdef HAVE_TPETRA_MMM_TIMINGS
814 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLSort"))));
818 if (params.is_null() || params->get(
"sort entries",
true))
819 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
820 C.setAllValues(row_mapC, entriesC, valuesC);
822 #ifdef HAVE_TPETRA_MMM_TIMINGS
824 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
828 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
829 labelList->set(
"Timer Label", label);
830 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
831 Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
832 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.