42 #ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
45 #ifdef HAVE_TPETRA_INST_HIP
51 template<
class Scalar,
54 class LocalOrdinalViewType>
55 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
56 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
58 const LocalOrdinalViewType & Acol2Brow,
59 const LocalOrdinalViewType & Acol2Irow,
60 const LocalOrdinalViewType & Bcol2Ccol,
61 const LocalOrdinalViewType & Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
63 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
71 const LocalOrdinalViewType & Acol2Brow,
72 const LocalOrdinalViewType & Acol2Irow,
73 const LocalOrdinalViewType & Bcol2Ccol,
74 const LocalOrdinalViewType & Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
76 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
83 template<
class Scalar,
85 class GlobalOrdinal,
class LocalOrdinalViewType>
86 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
87 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
88 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
89 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
90 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
91 const LocalOrdinalViewType & Acol2Brow,
92 const LocalOrdinalViewType & Acol2Irow,
93 const LocalOrdinalViewType & Bcol2Ccol,
94 const LocalOrdinalViewType & Icol2Ccol,
95 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
96 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
97 const std::string& label = std::string(),
98 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
100 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
101 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
102 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
103 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
104 const LocalOrdinalViewType & Acol2Brow,
105 const LocalOrdinalViewType & Acol2Irow,
106 const LocalOrdinalViewType & Bcol2Ccol,
107 const LocalOrdinalViewType & Icol2Ccol,
108 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
109 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
110 const std::string& label = std::string(),
111 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
113 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
114 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
115 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
116 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
117 const LocalOrdinalViewType & Acol2Brow,
118 const LocalOrdinalViewType & Acol2Irow,
119 const LocalOrdinalViewType & Bcol2Ccol,
120 const LocalOrdinalViewType & Icol2Ccol,
121 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
122 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
123 const std::string& label = std::string(),
124 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
130 template<
class Scalar,
133 class LocalOrdinalViewType>
134 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
135 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
136 const LocalOrdinalViewType & Acol2Brow,
137 const LocalOrdinalViewType & Acol2Irow,
138 const LocalOrdinalViewType & Bcol2Ccol,
139 const LocalOrdinalViewType & Icol2Ccol,
140 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
141 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
142 const std::string& label,
143 const Teuchos::RCP<Teuchos::ParameterList>& params) {
146 #ifdef HAVE_TPETRA_MMM_TIMINGS
147 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
148 using Teuchos::TimeMonitor;
149 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPWrapper")))));
152 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
153 std::string nodename(
"HIP");
158 typedef typename KCRS::device_type device_t;
159 typedef typename KCRS::StaticCrsGraphType graph_t;
160 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
161 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
162 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
163 typedef typename KCRS::values_type::non_const_type scalar_view_t;
167 int team_work_size = 16;
168 std::string myalg(
"SPGEMM_KK_MEMORY");
169 if(!params.is_null()) {
170 if(params->isParameter(
"hip: algorithm"))
171 myalg = params->get(
"hip: algorithm",myalg);
172 if(params->isParameter(
"hip: team work size"))
173 team_work_size = params->get(
"hip: team work size",team_work_size);
177 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
178 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
179 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
182 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
183 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
185 c_lno_view_t Arowptr = Amat.graph.row_map,
186 Browptr = Bmat.graph.row_map;
187 const lno_nnz_view_t Acolind = Amat.graph.entries,
188 Bcolind = Bmat.graph.entries;
189 const scalar_view_t Avals = Amat.values,
192 c_lno_view_t Irowptr;
193 lno_nnz_view_t Icolind;
195 if(!Bview.importMatrix.is_null()) {
196 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
197 Irowptr = lclB.graph.row_map;
198 Icolind = lclB.graph.entries;
204 std::string alg = nodename+std::string(
" algorithm");
206 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
207 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
210 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
212 #ifdef HAVE_TPETRA_MMM_TIMINGS
213 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPCore"))));
217 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
218 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
219 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
221 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
222 lno_nnz_view_t entriesC;
223 scalar_view_t valuesC;
225 kh.create_spgemm_handle(alg_enum);
226 kh.set_team_work_size(team_work_size);
228 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
230 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
232 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
233 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
235 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
236 kh.destroy_spgemm_handle();
238 #ifdef HAVE_TPETRA_MMM_TIMINGS
239 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPSort"))));
243 if (params.is_null() || params->get(
"sort entries",
true))
244 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
245 C.setAllValues(row_mapC,entriesC,valuesC);
247 #ifdef HAVE_TPETRA_MMM_TIMINGS
248 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPESFC"))));
252 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
253 labelList->set(
"Timer Label",label);
254 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
255 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
256 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
261 template<
class Scalar,
264 class LocalOrdinalViewType>
265 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
266 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
267 const LocalOrdinalViewType & targetMapToOrigRow_dev,
268 const LocalOrdinalViewType & targetMapToImportRow_dev,
269 const LocalOrdinalViewType & Bcol2Ccol_dev,
270 const LocalOrdinalViewType & Icol2Ccol_dev,
271 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
272 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
273 const std::string& label,
274 const Teuchos::RCP<Teuchos::ParameterList>& params) {
277 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
279 #ifdef HAVE_TPETRA_MMM_TIMINGS
280 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
281 using Teuchos::TimeMonitor;
282 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
283 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
290 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
291 typedef typename KCRS::StaticCrsGraphType graph_t;
292 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
293 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
294 typedef typename KCRS::values_type::non_const_type scalar_view_t;
297 typedef LocalOrdinal LO;
298 typedef GlobalOrdinal GO;
300 typedef Map<LO,GO,NO> map_type;
301 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
302 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
303 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
308 auto targetMapToOrigRow =
309 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
310 targetMapToOrigRow_dev);
311 auto targetMapToImportRow =
312 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
313 targetMapToImportRow_dev);
315 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
318 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
322 RCP<const map_type> Ccolmap = C.getColMap();
323 size_t m = Aview.origMatrix->getLocalNumRows();
324 size_t n = Ccolmap->getLocalNumElements();
327 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
328 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
329 const KCRS & Cmat = C.getLocalMatrixHost();
331 c_lno_view_t Arowptr = Amat.graph.row_map,
332 Browptr = Bmat.graph.row_map,
333 Crowptr = Cmat.graph.row_map;
334 const lno_nnz_view_t Acolind = Amat.graph.entries,
335 Bcolind = Bmat.graph.entries,
336 Ccolind = Cmat.graph.entries;
337 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
338 scalar_view_t Cvals = Cmat.values;
340 c_lno_view_t Irowptr;
341 lno_nnz_view_t Icolind;
343 if(!Bview.importMatrix.is_null()) {
344 auto lclB = Bview.importMatrix->getLocalMatrixHost();
345 Irowptr = lclB.graph.row_map;
346 Icolind = lclB.graph.entries;
350 #ifdef HAVE_TPETRA_MMM_TIMINGS
351 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
363 std::vector<size_t> c_status(n, ST_INVALID);
366 size_t CSR_ip = 0, OLD_ip = 0;
367 for (
size_t i = 0; i < m; i++) {
371 CSR_ip = Crowptr[i+1];
372 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
373 c_status[Ccolind[k]] = k;
379 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
381 const SC Aval = Avals[k];
385 if (targetMapToOrigRow[Aik] != LO_INVALID) {
387 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
389 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
391 LO Cij = Bcol2Ccol[Bkj];
393 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
394 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
395 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
397 Cvals[c_status[Cij]] += Aval * Bvals[j];
402 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
403 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
405 LO Cij = Icol2Ccol[Ikj];
407 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
408 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
409 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
411 Cvals[c_status[Cij]] += Aval * Ivals[j];
417 C.fillComplete(C.getDomainMap(), C.getRangeMap());
421 template<
class Scalar,
424 class LocalOrdinalViewType>
425 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
426 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
427 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
428 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
429 const LocalOrdinalViewType & Acol2Brow,
430 const LocalOrdinalViewType & Acol2Irow,
431 const LocalOrdinalViewType & Bcol2Ccol,
432 const LocalOrdinalViewType & Icol2Ccol,
433 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
434 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
435 const std::string& label,
436 const Teuchos::RCP<Teuchos::ParameterList>& params) {
438 #ifdef HAVE_TPETRA_MMM_TIMINGS
439 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
440 using Teuchos::TimeMonitor;
441 Teuchos::RCP<TimeMonitor> MM;
449 std::string myalg(
"KK");
450 if(!params.is_null()) {
451 if(params->isParameter(
"hip: jacobi algorithm"))
452 myalg = params->get(
"hip: jacobi algorithm",myalg);
455 if(myalg ==
"MSAK") {
456 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
458 else if(myalg ==
"KK") {
459 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
462 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
465 #ifdef HAVE_TPETRA_MMM_TIMINGS
466 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPESFC"))));
470 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
471 labelList->set(
"Timer Label",label);
472 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
475 if(!C.isFillComplete()) {
476 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
477 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
485 template<
class Scalar,
488 class LocalOrdinalViewType>
489 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
490 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
491 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
492 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
493 const LocalOrdinalViewType & targetMapToOrigRow_dev,
494 const LocalOrdinalViewType & targetMapToImportRow_dev,
495 const LocalOrdinalViewType & Bcol2Ccol_dev,
496 const LocalOrdinalViewType & Icol2Ccol_dev,
497 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
498 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
499 const std::string& label,
500 const Teuchos::RCP<Teuchos::ParameterList>& params) {
503 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
505 #ifdef HAVE_TPETRA_MMM_TIMINGS
506 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
507 using Teuchos::TimeMonitor;
508 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse HIPCore"))));
509 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
515 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
516 typedef typename KCRS::StaticCrsGraphType graph_t;
517 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
518 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
519 typedef typename KCRS::values_type::non_const_type scalar_view_t;
520 typedef typename scalar_view_t::memory_space scalar_memory_space;
523 typedef LocalOrdinal LO;
524 typedef GlobalOrdinal GO;
526 typedef Map<LO,GO,NO> map_type;
527 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
528 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
529 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
534 auto targetMapToOrigRow =
535 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
536 targetMapToOrigRow_dev);
537 auto targetMapToImportRow =
538 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
539 targetMapToImportRow_dev);
541 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
544 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
549 RCP<const map_type> Ccolmap = C.getColMap();
550 size_t m = Aview.origMatrix->getLocalNumRows();
551 size_t n = Ccolmap->getLocalNumElements();
554 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
555 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
556 const KCRS & Cmat = C.getLocalMatrixHost();
558 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
559 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
560 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
561 scalar_view_t Cvals = Cmat.values;
563 c_lno_view_t Irowptr;
564 lno_nnz_view_t Icolind;
566 if(!Bview.importMatrix.is_null()) {
567 auto lclB = Bview.importMatrix->getLocalMatrixHost();
568 Irowptr = lclB.graph.row_map;
569 Icolind = lclB.graph.entries;
575 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
577 #ifdef HAVE_TPETRA_MMM_TIMINGS
578 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse HIPCore - Compare"))));
586 std::vector<size_t> c_status(n, ST_INVALID);
589 size_t CSR_ip = 0, OLD_ip = 0;
590 for (
size_t i = 0; i < m; i++) {
595 CSR_ip = Crowptr[i+1];
596 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
597 c_status[Ccolind[k]] = k;
603 SC minusOmegaDval = -omega*Dvals(i,0);
606 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
607 Scalar Bval = Bvals[j];
611 LO Cij = Bcol2Ccol[Bij];
613 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
614 std::runtime_error,
"Trying to insert a new entry into a static graph");
616 Cvals[c_status[Cij]] = Bvals[j];
620 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
622 const SC Aval = Avals[k];
626 if (targetMapToOrigRow[Aik] != LO_INVALID) {
628 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
630 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
632 LO Cij = Bcol2Ccol[Bkj];
634 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
635 std::runtime_error,
"Trying to insert a new entry into a static graph");
637 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
642 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
643 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
645 LO Cij = Icol2Ccol[Ikj];
647 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
648 std::runtime_error,
"Trying to insert a new entry into a static graph");
650 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
656 #ifdef HAVE_TPETRA_MMM_TIMINGS
658 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
661 C.fillComplete(C.getDomainMap(), C.getRangeMap());
666 template<
class Scalar,
669 class LocalOrdinalViewType>
670 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
671 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
672 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
673 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
674 const LocalOrdinalViewType & Acol2Brow,
675 const LocalOrdinalViewType & Acol2Irow,
676 const LocalOrdinalViewType & Bcol2Ccol,
677 const LocalOrdinalViewType & Icol2Ccol,
678 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
679 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
680 const std::string& label,
681 const Teuchos::RCP<Teuchos::ParameterList>& params) {
683 #ifdef HAVE_TPETRA_MMM_TIMINGS
684 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
685 using Teuchos::TimeMonitor;
686 Teuchos::RCP<TimeMonitor> MM;
693 auto rowMap = Aview.origMatrix->getRowMap();
695 Aview.origMatrix->getLocalDiagCopy(diags);
696 size_t diagLength = rowMap->getLocalNumElements();
697 Teuchos::Array<Scalar> diagonal(diagLength);
698 diags.get1dCopy(diagonal());
700 for(
size_t i = 0; i < diagLength; ++i) {
701 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
703 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
704 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
709 using device_t =
typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
711 using graph_t =
typename matrix_t::StaticCrsGraphType;
712 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
713 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
714 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
715 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
718 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
719 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
720 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
723 c_lno_view_t Irowptr;
724 lno_nnz_view_t Icolind;
726 if(!Bview.importMatrix.is_null()) {
727 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
728 Irowptr = lclB.graph.row_map;
729 Icolind = lclB.graph.entries;
734 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
737 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
738 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
740 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
741 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
742 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
744 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
745 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
746 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
749 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
750 lno_nnz_view_t entriesC;
751 scalar_view_t valuesC;
754 int team_work_size = 16;
755 std::string myalg(
"SPGEMM_KK_MEMORY");
756 if(!params.is_null()) {
757 if(params->isParameter(
"hip: algorithm"))
758 myalg = params->get(
"hip: algorithm",myalg);
759 if(params->isParameter(
"hip: team work size"))
760 team_work_size = params->get(
"hip: team work size",team_work_size);
764 std::string nodename(
"HIP");
765 std::string alg = nodename + std::string(
" algorithm");
766 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
767 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
772 kh.create_spgemm_handle(alg_enum);
773 kh.set_team_work_size(team_work_size);
775 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
776 Arowptr, Acolind,
false,
777 Browptr, Bcolind,
false,
780 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
782 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
783 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
786 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
787 Arowptr, Acolind, Avals,
false,
788 Browptr, Bcolind, Bvals,
false,
789 row_mapC, entriesC, valuesC,
790 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
791 kh.destroy_spgemm_handle();
793 #ifdef HAVE_TPETRA_MMM_TIMINGS
794 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPSort"))));
798 if (params.is_null() || params->get(
"sort entries",
true))
799 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
800 C.setAllValues(row_mapC,entriesC,valuesC);
802 #ifdef HAVE_TPETRA_MMM_TIMINGS
803 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPESFC"))));
807 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
808 labelList->set(
"Timer Label",label);
809 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
810 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
811 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.