10 #ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
13 #include "Tpetra_Details_IntRowPtrHelper.hpp"
15 #ifdef HAVE_TPETRA_INST_SYCL
21 template<
class Scalar,
24 class LocalOrdinalViewType>
25 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
28 const LocalOrdinalViewType & Acol2Brow,
29 const LocalOrdinalViewType & Acol2Irow,
30 const LocalOrdinalViewType & Bcol2Ccol,
31 const LocalOrdinalViewType & Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
33 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
39 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
40 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
41 const LocalOrdinalViewType & Acol2Brow,
42 const LocalOrdinalViewType & Acol2Irow,
43 const LocalOrdinalViewType & Bcol2Ccol,
44 const LocalOrdinalViewType & Icol2Ccol,
45 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
46 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
47 const std::string& label = std::string(),
48 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
53 template<
class Scalar,
55 class GlobalOrdinal,
class LocalOrdinalViewType>
56 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
57 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
58 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
59 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
66 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
70 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
71 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
72 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
79 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
83 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
84 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
85 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
86 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
87 const LocalOrdinalViewType & Acol2Brow,
88 const LocalOrdinalViewType & Acol2Irow,
89 const LocalOrdinalViewType & Bcol2Ccol,
90 const LocalOrdinalViewType & Icol2Ccol,
91 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
92 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
93 const std::string& label = std::string(),
94 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
100 template<
class Scalar,
103 class LocalOrdinalViewType>
104 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
106 const LocalOrdinalViewType & Acol2Brow,
107 const LocalOrdinalViewType & Acol2Irow,
108 const LocalOrdinalViewType & Bcol2Ccol,
109 const LocalOrdinalViewType & Icol2Ccol,
110 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
111 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
112 const std::string& label,
113 const Teuchos::RCP<Teuchos::ParameterList>& params) {
116 #ifdef HAVE_TPETRA_MMM_TIMINGS
117 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
118 using Teuchos::TimeMonitor;
119 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLWrapper")))));
122 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
123 std::string nodename(
"SYCL");
128 typedef typename KCRS::device_type device_t;
129 typedef typename KCRS::StaticCrsGraphType graph_t;
130 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
131 using int_view_t = Kokkos::View<int *, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
132 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
133 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
134 typedef typename KCRS::values_type::non_const_type scalar_view_t;
138 int team_work_size = 16;
139 std::string myalg(
"SPGEMM_KK_MEMORY");
140 if(!params.is_null()) {
141 if(params->isParameter(
"sycl: algorithm"))
142 myalg = params->get(
"sycl: algorithm",myalg);
143 if(params->isParameter(
"sycl: team work size"))
144 team_work_size = params->get(
"sycl: team work size",team_work_size);
148 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
149 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
150 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
151 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
152 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
153 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
156 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
157 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
159 c_lno_view_t Arowptr = Amat.graph.row_map,
160 Browptr = Bmat.graph.row_map;
161 const lno_nnz_view_t Acolind = Amat.graph.entries,
162 Bcolind = Bmat.graph.entries;
163 const scalar_view_t Avals = Amat.values,
167 std::string alg = nodename+std::string(
" algorithm");
169 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
170 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
173 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
175 #ifdef HAVE_TPETRA_MMM_TIMINGS
176 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLCore"))));
180 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
181 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
182 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
184 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lno_row"), AnumRows + 1);
185 lno_nnz_view_t entriesC;
186 scalar_view_t valuesC;
191 const bool useIntRowptrs =
192 irph.shouldUseIntRowptrs() &&
193 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
197 kh.create_spgemm_handle(alg_enum);
198 kh.set_team_work_size(team_work_size);
200 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_int_row"), AnumRows + 1);
202 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
203 auto Bint = irph.getIntRowptrMatrix(Bmerged);
204 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,
false,Bint.graph.row_map,Bint.graph.entries,
false, int_row_mapC);
206 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
208 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
209 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
211 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,Aint.values,
false,Bint.graph.row_map,Bint.graph.entries,Bint.values,
false,int_row_mapC,entriesC,valuesC);
213 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(
int i){ row_mapC(i) = int_row_mapC(i);});
214 kh.destroy_spgemm_handle();
218 kh.create_spgemm_handle(alg_enum);
219 kh.set_team_work_size(team_work_size);
221 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
223 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
225 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
226 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
229 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
230 kh.destroy_spgemm_handle();
233 #ifdef HAVE_TPETRA_MMM_TIMINGS
234 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLSort"))));
238 if (params.is_null() || params->get(
"sort entries",
true))
239 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
240 C.setAllValues(row_mapC,entriesC,valuesC);
242 #ifdef HAVE_TPETRA_MMM_TIMINGS
243 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLESFC"))));
247 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
248 labelList->set(
"Timer Label",label);
249 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
250 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
251 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
256 template<
class Scalar,
259 class LocalOrdinalViewType>
260 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
261 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
262 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
263 const LocalOrdinalViewType & targetMapToOrigRow_dev,
264 const LocalOrdinalViewType & targetMapToImportRow_dev,
265 const LocalOrdinalViewType & Bcol2Ccol_dev,
266 const LocalOrdinalViewType & Icol2Ccol_dev,
267 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
268 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
269 const std::string& label,
270 const Teuchos::RCP<Teuchos::ParameterList>& params) {
273 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
275 #ifdef HAVE_TPETRA_MMM_TIMINGS
276 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
277 using Teuchos::TimeMonitor;
278 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
279 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
286 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
287 typedef typename KCRS::StaticCrsGraphType graph_t;
288 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
289 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
290 typedef typename KCRS::values_type::non_const_type scalar_view_t;
293 typedef LocalOrdinal LO;
294 typedef GlobalOrdinal GO;
296 typedef Map<LO,GO,NO> map_type;
297 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
298 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
299 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
307 auto targetMapToOrigRow =
308 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
309 targetMapToOrigRow_dev);
310 auto targetMapToImportRow =
311 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
312 targetMapToImportRow_dev);
314 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
321 RCP<const map_type> Ccolmap = C.getColMap();
322 size_t m = Aview.origMatrix->getLocalNumRows();
323 size_t n = Ccolmap->getLocalNumElements();
326 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
327 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
328 const KCRS & Cmat = C.getLocalMatrixHost();
330 c_lno_view_t Arowptr = Amat.graph.row_map,
331 Browptr = Bmat.graph.row_map,
332 Crowptr = Cmat.graph.row_map;
333 const lno_nnz_view_t Acolind = Amat.graph.entries,
334 Bcolind = Bmat.graph.entries,
335 Ccolind = Cmat.graph.entries;
336 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
337 scalar_view_t Cvals = Cmat.values;
339 c_lno_view_t Irowptr;
340 lno_nnz_view_t Icolind;
342 if(!Bview.importMatrix.is_null()) {
343 auto lclB = Bview.importMatrix->getLocalMatrixHost();
344 Irowptr = lclB.graph.row_map;
345 Icolind = lclB.graph.entries;
349 #ifdef HAVE_TPETRA_MMM_TIMINGS
350 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
362 std::vector<size_t> c_status(n, ST_INVALID);
365 size_t CSR_ip = 0, OLD_ip = 0;
366 for (
size_t i = 0; i < m; i++) {
370 CSR_ip = Crowptr[i+1];
371 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
372 c_status[Ccolind[k]] = k;
378 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
380 const SC Aval = Avals[k];
384 if (targetMapToOrigRow[Aik] != LO_INVALID) {
386 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
388 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
390 LO Cij = Bcol2Ccol[Bkj];
392 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
393 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
394 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
396 Cvals[c_status[Cij]] += Aval * Bvals[j];
401 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
402 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
404 LO Cij = Icol2Ccol[Ikj];
406 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
407 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
408 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
410 Cvals[c_status[Cij]] += Aval * Ivals[j];
416 C.fillComplete(C.getDomainMap(), C.getRangeMap());
420 template<
class Scalar,
423 class LocalOrdinalViewType>
424 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
425 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
426 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
427 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
428 const LocalOrdinalViewType & Acol2Brow,
429 const LocalOrdinalViewType & Acol2Irow,
430 const LocalOrdinalViewType & Bcol2Ccol,
431 const LocalOrdinalViewType & Icol2Ccol,
432 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
433 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
434 const std::string& label,
435 const Teuchos::RCP<Teuchos::ParameterList>& params) {
437 #ifdef HAVE_TPETRA_MMM_TIMINGS
438 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
439 using Teuchos::TimeMonitor;
440 Teuchos::RCP<TimeMonitor> MM;
448 std::string myalg(
"KK");
449 if(!params.is_null()) {
450 if(params->isParameter(
"sycl: jacobi algorithm"))
451 myalg = params->get(
"sycl: jacobi algorithm",myalg);
454 if(myalg ==
"MSAK") {
455 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
457 else if(myalg ==
"KK") {
458 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
461 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
464 #ifdef HAVE_TPETRA_MMM_TIMINGS
465 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
469 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
470 labelList->set(
"Timer Label",label);
471 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
474 if(!C.isFillComplete()) {
475 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
476 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
484 template<
class Scalar,
487 class LocalOrdinalViewType>
488 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
489 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
490 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
491 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
492 const LocalOrdinalViewType & targetMapToOrigRow_dev,
493 const LocalOrdinalViewType & targetMapToImportRow_dev,
494 const LocalOrdinalViewType & Bcol2Ccol_dev,
495 const LocalOrdinalViewType & Icol2Ccol_dev,
496 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
497 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
498 const std::string& label,
499 const Teuchos::RCP<Teuchos::ParameterList>& params) {
502 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
504 #ifdef HAVE_TPETRA_MMM_TIMINGS
505 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
506 using Teuchos::TimeMonitor;
507 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore"))));
508 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
514 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
515 typedef typename KCRS::StaticCrsGraphType graph_t;
516 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
517 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
518 typedef typename KCRS::values_type::non_const_type scalar_view_t;
519 typedef typename scalar_view_t::memory_space scalar_memory_space;
522 typedef LocalOrdinal LO;
523 typedef GlobalOrdinal GO;
525 typedef Map<LO,GO,NO> map_type;
526 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
527 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
528 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
536 auto targetMapToOrigRow =
537 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
538 targetMapToOrigRow_dev);
539 auto targetMapToImportRow =
540 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
541 targetMapToImportRow_dev);
543 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
546 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
551 RCP<const map_type> Ccolmap = C.getColMap();
552 size_t m = Aview.origMatrix->getLocalNumRows();
553 size_t n = Ccolmap->getLocalNumElements();
556 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
557 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
558 const KCRS & Cmat = C.getLocalMatrixHost();
560 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
561 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
562 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
563 scalar_view_t Cvals = Cmat.values;
565 c_lno_view_t Irowptr;
566 lno_nnz_view_t Icolind;
568 if(!Bview.importMatrix.is_null()) {
569 auto lclB = Bview.importMatrix->getLocalMatrixHost();
570 Irowptr = lclB.graph.row_map;
571 Icolind = lclB.graph.entries;
577 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
579 #ifdef HAVE_TPETRA_MMM_TIMINGS
580 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore - Compare"))));
588 std::vector<size_t> c_status(n, ST_INVALID);
591 size_t CSR_ip = 0, OLD_ip = 0;
592 for (
size_t i = 0; i < m; i++) {
597 CSR_ip = Crowptr[i+1];
598 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
599 c_status[Ccolind[k]] = k;
605 SC minusOmegaDval = -omega*Dvals(i,0);
608 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
609 Scalar Bval = Bvals[j];
613 LO Cij = Bcol2Ccol[Bij];
615 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
616 std::runtime_error,
"Trying to insert a new entry into a static graph");
618 Cvals[c_status[Cij]] = Bvals[j];
622 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
624 const SC Aval = Avals[k];
628 if (targetMapToOrigRow[Aik] != LO_INVALID) {
630 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
632 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
634 LO Cij = Bcol2Ccol[Bkj];
636 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
637 std::runtime_error,
"Trying to insert a new entry into a static graph");
639 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
644 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
645 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
647 LO Cij = Icol2Ccol[Ikj];
649 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
650 std::runtime_error,
"Trying to insert a new entry into a static graph");
652 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
658 #ifdef HAVE_TPETRA_MMM_TIMINGS
660 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
663 C.fillComplete(C.getDomainMap(), C.getRangeMap());
668 template<
class Scalar,
671 class LocalOrdinalViewType>
672 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
673 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
674 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
675 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
676 const LocalOrdinalViewType & Acol2Brow,
677 const LocalOrdinalViewType & Acol2Irow,
678 const LocalOrdinalViewType & Bcol2Ccol,
679 const LocalOrdinalViewType & Icol2Ccol,
680 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
681 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
682 const std::string& label,
683 const Teuchos::RCP<Teuchos::ParameterList>& params) {
685 #ifdef HAVE_TPETRA_MMM_TIMINGS
686 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
687 using Teuchos::TimeMonitor;
688 Teuchos::RCP<TimeMonitor> MM;
695 auto rowMap = Aview.origMatrix->getRowMap();
697 Aview.origMatrix->getLocalDiagCopy(diags);
698 size_t diagLength = rowMap->getLocalNumElements();
699 Teuchos::Array<Scalar> diagonal(diagLength);
700 diags.get1dCopy(diagonal());
702 for(
size_t i = 0; i < diagLength; ++i) {
703 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
705 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
706 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
711 using device_t =
typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
713 using graph_t =
typename matrix_t::StaticCrsGraphType;
714 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
715 using int_view_t = Kokkos::View<
int *,
716 typename lno_view_t::array_layout,
717 typename lno_view_t::memory_space,
718 typename lno_view_t::memory_traits>;
719 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
720 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
721 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
724 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
725 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
726 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
727 using int_handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
728 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
729 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
732 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
735 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
736 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
738 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
739 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
740 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
743 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"row_mapC"), AnumRows + 1);
744 lno_nnz_view_t entriesC;
745 scalar_view_t valuesC;
748 int team_work_size = 16;
749 std::string myalg(
"SPGEMM_KK_MEMORY");
750 if(!params.is_null()) {
751 if(params->isParameter(
"sycl: algorithm"))
752 myalg = params->get(
"sycl: algorithm",myalg);
753 if(params->isParameter(
"sycl: team work size"))
754 team_work_size = params->get(
"sycl: team work size",team_work_size);
758 std::string nodename(
"SYCL");
759 std::string alg = nodename + std::string(
" algorithm");
760 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
761 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
765 const bool useIntRowptrs =
766 irph.shouldUseIntRowptrs() &&
767 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
771 kh.create_spgemm_handle(alg_enum);
772 kh.set_team_work_size(team_work_size);
774 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"int_row_mapC"), AnumRows + 1);
776 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
777 auto Bint = irph.getIntRowptrMatrix(Bmerged);
779 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
780 Aint.graph.row_map, Aint.graph.entries,
false,
781 Bint.graph.row_map, Bint.graph.entries,
false,
784 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
786 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
787 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
792 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
793 Aint.graph.row_map, Aint.graph.entries, Amat.values,
false,
794 Bint.graph.row_map, Bint.graph.entries, Bint.values,
false,
795 int_row_mapC, entriesC, valuesC,
796 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
798 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(
int i){ row_mapC(i) = int_row_mapC(i);});
799 kh.destroy_spgemm_handle();
802 kh.create_spgemm_handle(alg_enum);
803 kh.set_team_work_size(team_work_size);
805 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
806 Amat.graph.row_map, Amat.graph.entries,
false,
807 Bmerged.graph.row_map, Bmerged.graph.entries,
false,
810 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
812 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
813 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
815 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
816 Amat.graph.row_map, Amat.graph.entries, Amat.values,
false,
817 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values,
false,
818 row_mapC, entriesC, valuesC,
819 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
820 kh.destroy_spgemm_handle();
823 #ifdef HAVE_TPETRA_MMM_TIMINGS
824 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLSort"))));
828 if (params.is_null() || params->get(
"sort entries",
true))
829 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
830 C.setAllValues(row_mapC,entriesC,valuesC);
832 #ifdef HAVE_TPETRA_MMM_TIMINGS
833 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
837 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
838 labelList->set(
"Timer Label",label);
839 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
840 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
841 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.