42 #ifndef TPETRA_MATRIXMATRIX_CUDA_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_CUDA_DEF_HPP
45 #ifdef HAVE_TPETRA_INST_CUDA
51 template<
class Scalar,
54 class LocalOrdinalViewType>
55 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode,LocalOrdinalViewType> {
56 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Bview,
58 const LocalOrdinalViewType & Acol2Brow,
59 const LocalOrdinalViewType & Acol2Irow,
60 const LocalOrdinalViewType & Bcol2Ccol,
61 const LocalOrdinalViewType & Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& C,
63 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Bview,
71 const LocalOrdinalViewType & Acol2Brow,
72 const LocalOrdinalViewType & Acol2Irow,
73 const LocalOrdinalViewType & Bcol2Ccol,
74 const LocalOrdinalViewType & Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& C,
76 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
83 template<
class Scalar,
85 class GlobalOrdinal,
class LocalOrdinalViewType>
86 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode,LocalOrdinalViewType> {
87 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
88 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> & Dinv,
89 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Aview,
90 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Bview,
91 const LocalOrdinalViewType & Acol2Brow,
92 const LocalOrdinalViewType & Acol2Irow,
93 const LocalOrdinalViewType & Bcol2Ccol,
94 const LocalOrdinalViewType & Icol2Ccol,
95 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& C,
96 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > Cimport,
97 const std::string& label = std::string(),
98 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
100 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
101 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> & Dinv,
102 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Aview,
103 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Bview,
104 const LocalOrdinalViewType & Acol2Brow,
105 const LocalOrdinalViewType & Acol2Irow,
106 const LocalOrdinalViewType & Bcol2Ccol,
107 const LocalOrdinalViewType & Icol2Ccol,
108 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& C,
109 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > Cimport,
110 const std::string& label = std::string(),
111 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
118 template<
class Scalar,
121 class LocalOrdinalViewType>
122 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Aview,
123 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Bview,
124 const LocalOrdinalViewType & Acol2Brow,
125 const LocalOrdinalViewType & Acol2Irow,
126 const LocalOrdinalViewType & Bcol2Ccol,
127 const LocalOrdinalViewType & Icol2Ccol,
128 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& C,
129 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > Cimport,
130 const std::string& label,
131 const Teuchos::RCP<Teuchos::ParameterList>& params) {
134 #ifdef HAVE_TPETRA_MMM_TIMINGS
135 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
136 using Teuchos::TimeMonitor;
137 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix CudaWrapper")))));
140 typedef Kokkos::Compat::KokkosCudaWrapperNode Node;
141 std::string nodename(
"Cuda");
146 typedef typename KCRS::device_type device_t;
147 typedef typename KCRS::StaticCrsGraphType graph_t;
148 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
149 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
150 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
151 typedef typename KCRS::values_type::non_const_type scalar_view_t;
155 int team_work_size = 16;
156 std::string myalg(
"SPGEMM_KK_MEMORY");
157 if(!params.is_null()) {
158 if(params->isParameter(
"cuda: algorithm"))
159 myalg = params->get(
"cuda: algorithm",myalg);
160 if(params->isParameter(
"cuda: team work size"))
161 team_work_size = params->get(
"cuda: team work size",team_work_size);
165 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
166 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
167 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
170 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
171 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
173 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
174 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
175 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
177 c_lno_view_t Irowptr;
178 lno_nnz_view_t Icolind;
180 if(!Bview.importMatrix.is_null()) {
181 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
182 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
183 Ivals = Bview.importMatrix->getLocalMatrix().values;
188 std::string alg = nodename+std::string(
" algorithm");
190 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
191 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
194 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements());
196 #ifdef HAVE_TPETRA_MMM_TIMINGS
197 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix CudaCore"))));
201 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
202 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
203 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
205 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
206 lno_nnz_view_t entriesC;
207 scalar_view_t valuesC;
209 kh.create_spgemm_handle(alg_enum);
210 kh.set_team_work_size(team_work_size);
212 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
214 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
216 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
217 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
219 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
220 kh.destroy_spgemm_handle();
222 #ifdef HAVE_TPETRA_MMM_TIMINGS
223 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix CudaSort"))));
227 if (params.is_null() || params->get(
"sort entries",
true))
228 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
229 C.setAllValues(row_mapC,entriesC,valuesC);
231 #ifdef HAVE_TPETRA_MMM_TIMINGS
232 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix CudaESFC"))));
236 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
237 labelList->set(
"Timer Label",label);
238 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
239 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > dummyExport;
240 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
245 template<
class Scalar,
248 class LocalOrdinalViewType>
249 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Aview,
250 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Bview,
251 const LocalOrdinalViewType & targetMapToOrigRow,
252 const LocalOrdinalViewType & targetMapToImportRow,
253 const LocalOrdinalViewType & Bcol2Ccol,
254 const LocalOrdinalViewType & Icol2Ccol,
255 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& C,
256 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > Cimport,
257 const std::string& label,
258 const Teuchos::RCP<Teuchos::ParameterList>& params) {
261 typedef Kokkos::Compat::KokkosCudaWrapperNode Node;
263 #ifdef HAVE_TPETRA_MMM_TIMINGS
264 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
265 using Teuchos::TimeMonitor;
266 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
267 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
275 typedef typename KCRS::StaticCrsGraphType graph_t;
276 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
277 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
278 typedef typename KCRS::values_type::non_const_type scalar_view_t;
281 typedef LocalOrdinal LO;
282 typedef GlobalOrdinal GO;
284 typedef Map<LO,GO,NO> map_type;
285 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
286 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
287 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
290 RCP<const map_type> Ccolmap = C.getColMap();
291 size_t m = Aview.origMatrix->getNodeNumRows();
292 size_t n = Ccolmap->getNodeNumElements();
295 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
296 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
297 const KCRS & Cmat = C.getLocalMatrix();
299 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
300 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
301 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
302 scalar_view_t Cvals = Cmat.values;
304 c_lno_view_t Irowptr;
305 lno_nnz_view_t Icolind;
307 if(!Bview.importMatrix.is_null()) {
308 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
309 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
310 Ivals = Bview.importMatrix->getLocalMatrix().values;
313 #ifdef HAVE_TPETRA_MMM_TIMINGS
314 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
326 std::vector<size_t> c_status(n, ST_INVALID);
329 size_t CSR_ip = 0, OLD_ip = 0;
330 for (
size_t i = 0; i < m; i++) {
334 CSR_ip = Crowptr[i+1];
335 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
336 c_status[Ccolind[k]] = k;
342 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
344 const SC Aval = Avals[k];
348 if (targetMapToOrigRow[Aik] != LO_INVALID) {
350 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
352 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
354 LO Cij = Bcol2Ccol[Bkj];
356 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
357 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
358 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
360 Cvals[c_status[Cij]] += Aval * Bvals[j];
365 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
366 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
368 LO Cij = Icol2Ccol[Ikj];
370 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
371 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
372 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
374 Cvals[c_status[Cij]] += Aval * Ivals[j];
380 C.fillComplete(C.getDomainMap(), C.getRangeMap());
384 template<
class Scalar,
387 class LocalOrdinalViewType>
388 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
389 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> & Dinv,
390 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Aview,
391 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Bview,
392 const LocalOrdinalViewType & Acol2Brow,
393 const LocalOrdinalViewType & Acol2Irow,
394 const LocalOrdinalViewType & Bcol2Ccol,
395 const LocalOrdinalViewType & Icol2Ccol,
396 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& C,
397 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > Cimport,
398 const std::string& label,
399 const Teuchos::RCP<Teuchos::ParameterList>& params) {
401 #ifdef HAVE_TPETRA_MMM_TIMINGS
402 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
403 using Teuchos::TimeMonitor;
404 Teuchos::RCP<TimeMonitor> MM;
412 std::string myalg(
"MSAK");
413 if(!params.is_null()) {
414 if(params->isParameter(
"cuda: jacobi algorithm"))
415 myalg = params->get(
"cuda: jacobi algorithm",myalg);
418 if(myalg ==
"MSAK") {
419 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
422 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
425 #ifdef HAVE_TPETRA_MMM_TIMINGS
426 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix CudaESFC"))));
430 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
431 labelList->set(
"Timer Label",label);
432 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
435 if(!C.isFillComplete()) {
436 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > dummyExport;
437 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
445 template<
class Scalar,
448 class LocalOrdinalViewType>
449 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
450 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> & Dinv,
451 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Aview,
452 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& Bview,
453 const LocalOrdinalViewType & targetMapToOrigRow,
454 const LocalOrdinalViewType & targetMapToImportRow,
455 const LocalOrdinalViewType & Bcol2Ccol,
456 const LocalOrdinalViewType & Icol2Ccol,
457 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosCudaWrapperNode>& C,
458 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosCudaWrapperNode> > Cimport,
459 const std::string& label,
460 const Teuchos::RCP<Teuchos::ParameterList>& params) {
463 typedef Kokkos::Compat::KokkosCudaWrapperNode Node;
465 #ifdef HAVE_TPETRA_MMM_TIMINGS
466 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
467 using Teuchos::TimeMonitor;
468 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse CudaCore"))));
469 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
476 typedef typename KCRS::StaticCrsGraphType graph_t;
477 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
478 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
479 typedef typename KCRS::values_type::non_const_type scalar_view_t;
480 typedef typename scalar_view_t::memory_space scalar_memory_space;
483 typedef LocalOrdinal LO;
484 typedef GlobalOrdinal GO;
486 typedef Map<LO,GO,NO> map_type;
487 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
488 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
489 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
492 RCP<const map_type> Ccolmap = C.getColMap();
493 size_t m = Aview.origMatrix->getNodeNumRows();
494 size_t n = Ccolmap->getNodeNumElements();
497 const KCRS & Amat = Aview.origMatrix->getLocalMatrix();
498 const KCRS & Bmat = Bview.origMatrix->getLocalMatrix();
499 const KCRS & Cmat = C.getLocalMatrix();
501 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
502 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
503 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
504 scalar_view_t Cvals = Cmat.values;
506 c_lno_view_t Irowptr;
507 lno_nnz_view_t Icolind;
509 if(!Bview.importMatrix.is_null()) {
510 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
511 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
512 Ivals = Bview.importMatrix->getLocalMatrix().values;
516 auto Dvals = Dinv.template getLocalView<scalar_memory_space>();
518 #ifdef HAVE_TPETRA_MMM_TIMINGS
519 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse CudaCore - Compare"))));
527 std::vector<size_t> c_status(n, ST_INVALID);
530 size_t CSR_ip = 0, OLD_ip = 0;
531 for (
size_t i = 0; i < m; i++) {
536 CSR_ip = Crowptr[i+1];
537 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
538 c_status[Ccolind[k]] = k;
544 SC minusOmegaDval = -omega*Dvals(i,0);
547 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
548 Scalar Bval = Bvals[j];
552 LO Cij = Bcol2Ccol[Bij];
554 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
555 std::runtime_error,
"Trying to insert a new entry into a static graph");
557 Cvals[c_status[Cij]] = Bvals[j];
561 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
563 const SC Aval = Avals[k];
567 if (targetMapToOrigRow[Aik] != LO_INVALID) {
569 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
571 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
573 LO Cij = Bcol2Ccol[Bkj];
575 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
576 std::runtime_error,
"Trying to insert a new entry into a static graph");
578 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
583 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
584 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
586 LO Cij = Icol2Ccol[Ikj];
588 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
589 std::runtime_error,
"Trying to insert a new entry into a static graph");
591 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
597 #ifdef HAVE_TPETRA_MMM_TIMINGS
599 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
602 C.fillComplete(C.getDomainMap(), C.getRangeMap());
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...