10 #ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
13 #include "Tpetra_Details_IntRowPtrHelper.hpp"
15 #ifdef HAVE_TPETRA_INST_HIP
21 template<
class Scalar,
24 class LocalOrdinalViewType>
25 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
28 const LocalOrdinalViewType & Acol2Brow,
29 const LocalOrdinalViewType & Acol2Irow,
30 const LocalOrdinalViewType & Bcol2Ccol,
31 const LocalOrdinalViewType & Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
33 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
39 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
40 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
41 const LocalOrdinalViewType & Acol2Brow,
42 const LocalOrdinalViewType & Acol2Irow,
43 const LocalOrdinalViewType & Bcol2Ccol,
44 const LocalOrdinalViewType & Icol2Ccol,
45 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
46 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
47 const std::string& label = std::string(),
48 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
53 template<
class Scalar,
55 class GlobalOrdinal,
class LocalOrdinalViewType>
56 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
57 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
58 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
59 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
66 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
70 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
71 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
72 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
79 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
83 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
84 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
85 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
86 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
87 const LocalOrdinalViewType & Acol2Brow,
88 const LocalOrdinalViewType & Acol2Irow,
89 const LocalOrdinalViewType & Bcol2Ccol,
90 const LocalOrdinalViewType & Icol2Ccol,
91 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
92 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
93 const std::string& label = std::string(),
94 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
100 template<
class Scalar,
103 class LocalOrdinalViewType>
104 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
106 const LocalOrdinalViewType & Acol2Brow,
107 const LocalOrdinalViewType & Acol2Irow,
108 const LocalOrdinalViewType & Bcol2Ccol,
109 const LocalOrdinalViewType & Icol2Ccol,
110 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
111 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
112 const std::string& label,
113 const Teuchos::RCP<Teuchos::ParameterList>& params) {
115 #ifdef HAVE_TPETRA_MMM_TIMINGS
116 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
117 using Teuchos::TimeMonitor;
118 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPWrapper")))));
121 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
122 std::string nodename(
"HIP");
127 typedef typename KCRS::device_type device_t;
128 typedef typename KCRS::StaticCrsGraphType graph_t;
129 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
130 using int_view_t = Kokkos::View<int *, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
131 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
132 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
133 typedef typename KCRS::values_type::non_const_type scalar_view_t;
137 int team_work_size = 16;
138 std::string myalg(
"SPGEMM_KK_MEMORY");
139 if(!params.is_null()) {
140 if(params->isParameter(
"hip: algorithm"))
141 myalg = params->get(
"hip: algorithm",myalg);
142 if(params->isParameter(
"hip: team work size"))
143 team_work_size = params->get(
"hip: team work size",team_work_size);
147 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
148 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
149 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
150 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
151 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
152 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
155 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
156 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
158 c_lno_view_t Arowptr = Amat.graph.row_map,
159 Browptr = Bmat.graph.row_map;
160 const lno_nnz_view_t Acolind = Amat.graph.entries,
161 Bcolind = Bmat.graph.entries;
162 const scalar_view_t Avals = Amat.values,
166 std::string alg = nodename+std::string(
" algorithm");
168 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
169 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
172 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
174 #ifdef HAVE_TPETRA_MMM_TIMINGS
175 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPCore"))));
179 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
180 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
181 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
184 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lno_row"), AnumRows + 1);
185 lno_nnz_view_t entriesC;
186 scalar_view_t valuesC;
189 const bool useIntRowptrs =
190 irph.shouldUseIntRowptrs() &&
191 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
195 kh.create_spgemm_handle(alg_enum);
196 kh.set_team_work_size(team_work_size);
198 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_int_row"), AnumRows + 1);
200 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
201 auto Bint = irph.getIntRowptrMatrix(Bmerged);
202 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,
false,Bint.graph.row_map,Bint.graph.entries,
false, int_row_mapC);
204 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
206 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
207 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
209 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,Aint.values,
false,Bint.graph.row_map,Bint.graph.entries,Bint.values,
false,int_row_mapC,entriesC,valuesC);
211 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(
int i){ row_mapC(i) = int_row_mapC(i);});
212 kh.destroy_spgemm_handle();
215 kh.create_spgemm_handle(alg_enum);
216 kh.set_team_work_size(team_work_size);
218 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
220 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
222 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
223 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
226 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
227 kh.destroy_spgemm_handle();
230 #ifdef HAVE_TPETRA_MMM_TIMINGS
231 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPSort"))));
235 if (params.is_null() || params->get(
"sort entries",
true))
236 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
237 C.setAllValues(row_mapC,entriesC,valuesC);
239 #ifdef HAVE_TPETRA_MMM_TIMINGS
240 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPESFC"))));
244 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
245 labelList->set(
"Timer Label",label);
246 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
247 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
248 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
253 template<
class Scalar,
256 class LocalOrdinalViewType>
257 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
258 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
259 const LocalOrdinalViewType & targetMapToOrigRow_dev,
260 const LocalOrdinalViewType & targetMapToImportRow_dev,
261 const LocalOrdinalViewType & Bcol2Ccol_dev,
262 const LocalOrdinalViewType & Icol2Ccol_dev,
263 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
264 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
265 const std::string& label,
266 const Teuchos::RCP<Teuchos::ParameterList>& params) {
269 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
271 #ifdef HAVE_TPETRA_MMM_TIMINGS
272 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
273 using Teuchos::TimeMonitor;
274 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
275 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
282 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
283 typedef typename KCRS::StaticCrsGraphType graph_t;
284 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
285 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
286 typedef typename KCRS::values_type::non_const_type scalar_view_t;
289 typedef LocalOrdinal LO;
290 typedef GlobalOrdinal GO;
292 typedef Map<LO,GO,NO> map_type;
293 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
294 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
295 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
300 auto targetMapToOrigRow =
301 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
302 targetMapToOrigRow_dev);
303 auto targetMapToImportRow =
304 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
305 targetMapToImportRow_dev);
307 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
310 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
314 RCP<const map_type> Ccolmap = C.getColMap();
315 size_t m = Aview.origMatrix->getLocalNumRows();
316 size_t n = Ccolmap->getLocalNumElements();
319 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
320 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
321 const KCRS & Cmat = C.getLocalMatrixHost();
323 c_lno_view_t Arowptr = Amat.graph.row_map,
324 Browptr = Bmat.graph.row_map,
325 Crowptr = Cmat.graph.row_map;
326 const lno_nnz_view_t Acolind = Amat.graph.entries,
327 Bcolind = Bmat.graph.entries,
328 Ccolind = Cmat.graph.entries;
329 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
330 scalar_view_t Cvals = Cmat.values;
332 c_lno_view_t Irowptr;
333 lno_nnz_view_t Icolind;
335 if(!Bview.importMatrix.is_null()) {
336 auto lclB = Bview.importMatrix->getLocalMatrixHost();
337 Irowptr = lclB.graph.row_map;
338 Icolind = lclB.graph.entries;
342 #ifdef HAVE_TPETRA_MMM_TIMINGS
343 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
355 std::vector<size_t> c_status(n, ST_INVALID);
358 size_t CSR_ip = 0, OLD_ip = 0;
359 for (
size_t i = 0; i < m; i++) {
363 CSR_ip = Crowptr[i+1];
364 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
365 c_status[Ccolind[k]] = k;
371 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
373 const SC Aval = Avals[k];
377 if (targetMapToOrigRow[Aik] != LO_INVALID) {
379 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
381 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
383 LO Cij = Bcol2Ccol[Bkj];
385 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
386 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
387 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
389 Cvals[c_status[Cij]] += Aval * Bvals[j];
394 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
395 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
397 LO Cij = Icol2Ccol[Ikj];
399 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
400 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
401 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
403 Cvals[c_status[Cij]] += Aval * Ivals[j];
409 C.fillComplete(C.getDomainMap(), C.getRangeMap());
413 template<
class Scalar,
416 class LocalOrdinalViewType>
417 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
418 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
419 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
420 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
421 const LocalOrdinalViewType & Acol2Brow,
422 const LocalOrdinalViewType & Acol2Irow,
423 const LocalOrdinalViewType & Bcol2Ccol,
424 const LocalOrdinalViewType & Icol2Ccol,
425 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
426 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
427 const std::string& label,
428 const Teuchos::RCP<Teuchos::ParameterList>& params) {
430 #ifdef HAVE_TPETRA_MMM_TIMINGS
431 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
432 using Teuchos::TimeMonitor;
433 Teuchos::RCP<TimeMonitor> MM;
441 std::string myalg(
"KK");
442 if(!params.is_null()) {
443 if(params->isParameter(
"hip: jacobi algorithm"))
444 myalg = params->get(
"hip: jacobi algorithm",myalg);
447 if(myalg ==
"MSAK") {
448 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
450 else if(myalg ==
"KK") {
451 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
454 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
457 #ifdef HAVE_TPETRA_MMM_TIMINGS
458 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPESFC"))));
462 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
463 labelList->set(
"Timer Label",label);
464 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
467 if(!C.isFillComplete()) {
468 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
469 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
477 template<
class Scalar,
480 class LocalOrdinalViewType>
481 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
482 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
483 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
484 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
485 const LocalOrdinalViewType & targetMapToOrigRow_dev,
486 const LocalOrdinalViewType & targetMapToImportRow_dev,
487 const LocalOrdinalViewType & Bcol2Ccol_dev,
488 const LocalOrdinalViewType & Icol2Ccol_dev,
489 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
490 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
491 const std::string& label,
492 const Teuchos::RCP<Teuchos::ParameterList>& params) {
495 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
497 #ifdef HAVE_TPETRA_MMM_TIMINGS
498 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
499 using Teuchos::TimeMonitor;
500 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse HIPCore"))));
501 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
507 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
508 typedef typename KCRS::StaticCrsGraphType graph_t;
509 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
510 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
511 typedef typename KCRS::values_type::non_const_type scalar_view_t;
512 typedef typename scalar_view_t::memory_space scalar_memory_space;
515 typedef LocalOrdinal LO;
516 typedef GlobalOrdinal GO;
518 typedef Map<LO,GO,NO> map_type;
519 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
520 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
521 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
526 auto targetMapToOrigRow =
527 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
528 targetMapToOrigRow_dev);
529 auto targetMapToImportRow =
530 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
531 targetMapToImportRow_dev);
533 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
536 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
541 RCP<const map_type> Ccolmap = C.getColMap();
542 size_t m = Aview.origMatrix->getLocalNumRows();
543 size_t n = Ccolmap->getLocalNumElements();
546 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
547 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
548 const KCRS & Cmat = C.getLocalMatrixHost();
550 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
551 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
552 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
553 scalar_view_t Cvals = Cmat.values;
555 c_lno_view_t Irowptr;
556 lno_nnz_view_t Icolind;
558 if(!Bview.importMatrix.is_null()) {
559 auto lclB = Bview.importMatrix->getLocalMatrixHost();
560 Irowptr = lclB.graph.row_map;
561 Icolind = lclB.graph.entries;
567 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
569 #ifdef HAVE_TPETRA_MMM_TIMINGS
570 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse HIPCore - Compare"))));
578 std::vector<size_t> c_status(n, ST_INVALID);
581 size_t CSR_ip = 0, OLD_ip = 0;
582 for (
size_t i = 0; i < m; i++) {
587 CSR_ip = Crowptr[i+1];
588 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
589 c_status[Ccolind[k]] = k;
595 SC minusOmegaDval = -omega*Dvals(i,0);
598 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
599 Scalar Bval = Bvals[j];
603 LO Cij = Bcol2Ccol[Bij];
605 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
606 std::runtime_error,
"Trying to insert a new entry into a static graph");
608 Cvals[c_status[Cij]] = Bvals[j];
612 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
614 const SC Aval = Avals[k];
618 if (targetMapToOrigRow[Aik] != LO_INVALID) {
620 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
622 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
624 LO Cij = Bcol2Ccol[Bkj];
626 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
627 std::runtime_error,
"Trying to insert a new entry into a static graph");
629 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
634 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
635 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
637 LO Cij = Icol2Ccol[Ikj];
639 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
640 std::runtime_error,
"Trying to insert a new entry into a static graph");
642 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
648 #ifdef HAVE_TPETRA_MMM_TIMINGS
650 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
653 C.fillComplete(C.getDomainMap(), C.getRangeMap());
658 template<
class Scalar,
661 class LocalOrdinalViewType>
662 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
663 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
664 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
665 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
666 const LocalOrdinalViewType & Acol2Brow,
667 const LocalOrdinalViewType & Acol2Irow,
668 const LocalOrdinalViewType & Bcol2Ccol,
669 const LocalOrdinalViewType & Icol2Ccol,
670 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
671 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
672 const std::string& label,
673 const Teuchos::RCP<Teuchos::ParameterList>& params) {
675 #ifdef HAVE_TPETRA_MMM_TIMINGS
676 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
677 using Teuchos::TimeMonitor;
678 Teuchos::RCP<TimeMonitor> MM;
685 auto rowMap = Aview.origMatrix->getRowMap();
687 Aview.origMatrix->getLocalDiagCopy(diags);
688 size_t diagLength = rowMap->getLocalNumElements();
689 Teuchos::Array<Scalar> diagonal(diagLength);
690 diags.get1dCopy(diagonal());
692 for(
size_t i = 0; i < diagLength; ++i) {
693 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
695 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
696 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
701 using device_t =
typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
703 using graph_t =
typename matrix_t::StaticCrsGraphType;
704 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
705 using int_view_t = Kokkos::View<
int *,
706 typename lno_view_t::array_layout,
707 typename lno_view_t::memory_space,
708 typename lno_view_t::memory_traits>;
709 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
710 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
711 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
714 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
715 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
716 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
717 using int_handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
718 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
719 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
722 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
725 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
726 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
728 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
729 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
730 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
732 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
733 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
734 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
737 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"row_mapC"), AnumRows + 1);
738 lno_nnz_view_t entriesC;
739 scalar_view_t valuesC;
742 int team_work_size = 16;
743 std::string myalg(
"SPGEMM_KK_MEMORY");
744 if(!params.is_null()) {
745 if(params->isParameter(
"hip: algorithm"))
746 myalg = params->get(
"hip: algorithm",myalg);
747 if(params->isParameter(
"hip: team work size"))
748 team_work_size = params->get(
"hip: team work size",team_work_size);
752 std::string nodename(
"HIP");
753 std::string alg = nodename + std::string(
" algorithm");
754 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
755 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
758 const bool useIntRowptrs =
759 irph.shouldUseIntRowptrs() &&
760 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
764 kh.create_spgemm_handle(alg_enum);
765 kh.set_team_work_size(team_work_size);
767 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"int_row_mapC"), AnumRows + 1);
769 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
770 auto Bint = irph.getIntRowptrMatrix(Bmerged);
772 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
773 Aint.graph.row_map, Aint.graph.entries,
false,
774 Bint.graph.row_map, Bint.graph.entries,
false,
777 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
779 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
780 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
785 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
786 Aint.graph.row_map, Aint.graph.entries, Amat.values,
false,
787 Bint.graph.row_map, Bint.graph.entries, Bint.values,
false,
788 int_row_mapC, entriesC, valuesC,
789 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
791 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(
int i){ row_mapC(i) = int_row_mapC(i);});
792 kh.destroy_spgemm_handle();
795 kh.create_spgemm_handle(alg_enum);
796 kh.set_team_work_size(team_work_size);
798 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
799 Amat.graph.row_map, Amat.graph.entries,
false,
800 Bmerged.graph.row_map, Bmerged.graph.entries,
false,
803 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
805 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
806 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
808 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
809 Amat.graph.row_map, Amat.graph.entries, Amat.values,
false,
810 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values,
false,
811 row_mapC, entriesC, valuesC,
812 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
813 kh.destroy_spgemm_handle();
816 #ifdef HAVE_TPETRA_MMM_TIMINGS
817 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPSort"))));
821 if (params.is_null() || params->get(
"sort entries",
true))
822 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
823 C.setAllValues(row_mapC,entriesC,valuesC);
825 #ifdef HAVE_TPETRA_MMM_TIMINGS
826 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPESFC"))));
830 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
831 labelList->set(
"Timer Label",label);
832 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
833 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
834 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.