42 #ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
45 #ifdef HAVE_TPETRA_INST_OPENMP
51 template<
class Scalar,
53 class GlobalOrdinal,
class LocalOrdinalViewType>
54 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
55 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
57 const LocalOrdinalViewType & Acol2Brow,
58 const LocalOrdinalViewType & Acol2Irow,
59 const LocalOrdinalViewType & Bcol2Ccol,
60 const LocalOrdinalViewType & Icol2Ccol,
61 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
62 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
63 const std::string& label = std::string(),
64 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType & Acol2Brow,
69 const LocalOrdinalViewType & Acol2Irow,
70 const LocalOrdinalViewType & Bcol2Ccol,
71 const LocalOrdinalViewType & Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82 template<
class Scalar,
84 class GlobalOrdinal,
class LocalOrdinalViewType>
85 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
86 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
87 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
88 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
89 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
90 const LocalOrdinalViewType & Acol2Brow,
91 const LocalOrdinalViewType & Acol2Irow,
92 const LocalOrdinalViewType & Bcol2Ccol,
93 const LocalOrdinalViewType & Icol2Ccol,
94 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
95 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
96 const std::string& label = std::string(),
97 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
99 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
100 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
102 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
103 const LocalOrdinalViewType & Acol2Brow,
104 const LocalOrdinalViewType & Acol2Irow,
105 const LocalOrdinalViewType & Bcol2Ccol,
106 const LocalOrdinalViewType & Icol2Ccol,
107 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
108 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
109 const std::string& label = std::string(),
110 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
112 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
113 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
114 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
115 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
116 const LocalOrdinalViewType & Acol2Brow,
117 const LocalOrdinalViewType & Acol2Irow,
118 const LocalOrdinalViewType & Bcol2Ccol,
119 const LocalOrdinalViewType & Icol2Ccol,
120 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
121 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
122 const std::string& label = std::string(),
123 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
129 template<
class Scalar,
131 class GlobalOrdinal,
class LocalOrdinalViewType>
132 struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
133 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
134 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
135 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
136 const LocalOrdinalViewType & Acol2Prow,
137 const LocalOrdinalViewType & Acol2PIrow,
138 const LocalOrdinalViewType & Pcol2Ccol,
139 const LocalOrdinalViewType & PIcol2Ccol,
140 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
141 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
142 const std::string& label = std::string(),
143 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
145 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
146 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
147 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
148 const LocalOrdinalViewType & Acol2Prow,
149 const LocalOrdinalViewType & Acol2PIrow,
150 const LocalOrdinalViewType & Pcol2Ccol,
151 const LocalOrdinalViewType & PIcol2Ccol,
152 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
153 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
154 const std::string& label = std::string(),
155 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
157 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
158 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
159 const LocalOrdinalViewType & Acol2Prow,
160 const LocalOrdinalViewType & Acol2PIrow,
161 const LocalOrdinalViewType & Pcol2Ccol,
162 const LocalOrdinalViewType & PIcol2Ccol,
163 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
164 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
165 const std::string& label = std::string(),
166 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
168 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
169 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
170 const LocalOrdinalViewType & Acol2Prow,
171 const LocalOrdinalViewType & Acol2PIrow,
172 const LocalOrdinalViewType & Pcol2Ccol,
173 const LocalOrdinalViewType & PIcol2Ccol,
174 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
175 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
176 const std::string& label = std::string(),
177 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
182 template<
class Scalar,
185 class LocalOrdinalViewType>
186 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
187 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
188 const LocalOrdinalViewType & Acol2Brow,
189 const LocalOrdinalViewType & Acol2Irow,
190 const LocalOrdinalViewType & Bcol2Ccol,
191 const LocalOrdinalViewType & Icol2Ccol,
192 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
193 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
194 const std::string& label,
195 const Teuchos::RCP<Teuchos::ParameterList>& params) {
197 #ifdef HAVE_TPETRA_MMM_TIMINGS
198 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
199 using Teuchos::TimeMonitor;
200 Teuchos::RCP<TimeMonitor> MM;
204 std::string nodename(
"OpenMP");
209 typedef typename KCRS::device_type device_t;
210 typedef typename KCRS::StaticCrsGraphType graph_t;
211 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
212 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
213 typedef typename KCRS::values_type::non_const_type scalar_view_t;
216 int team_work_size = 16;
217 std::string myalg(
"SPGEMM_KK_MEMORY");
220 if(!params.is_null()) {
221 if(params->isParameter(
"openmp: algorithm"))
222 myalg = params->get(
"openmp: algorithm",myalg);
223 if(params->isParameter(
"openmp: team work size"))
224 team_work_size = params->get(
"openmp: team work size",team_work_size);
229 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
233 #ifdef HAVE_TPETRA_MMM_TIMINGS
234 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPWrapper"))));
237 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
238 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
239 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
242 const KCRS & Ak = Aview.origMatrix->getLocalMatrix();
246 std::string alg = nodename+std::string(
" algorithm");
248 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
249 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
252 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements());
254 #ifdef HAVE_TPETRA_MMM_TIMINGS
255 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPCore"))));
259 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
262 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
263 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
266 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
267 lno_nnz_view_t entriesC;
268 scalar_view_t valuesC;
270 kh.create_spgemm_handle(alg_enum);
271 kh.set_team_work_size(team_work_size);
273 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
275 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
277 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
278 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
281 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
282 kh.destroy_spgemm_handle();
284 #ifdef HAVE_TPETRA_MMM_TIMINGS
285 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPSort"))));
288 if (params.is_null() || params->get(
"sort entries",
true))
289 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
290 C.setAllValues(row_mapC,entriesC,valuesC);
294 #ifdef HAVE_TPETRA_MMM_TIMINGS
295 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPESFC"))));
299 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
300 labelList->set(
"Timer Label",label);
301 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
302 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
303 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
307 Teuchos::ArrayRCP< const size_t > Crowptr;
308 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
309 Teuchos::ArrayRCP< const Scalar > Cvalues;
310 C.getAllValues(Crowptr,Ccolind,Cvalues);
313 int MyPID = C->getComm()->getRank();
314 printf(
"[%d] Crowptr = ",MyPID);
315 for(
size_t i=0; i<(size_t) Crowptr.size(); i++) {
316 printf(
"%3d ",(
int)Crowptr.getConst()[i]);
319 printf(
"[%d] Ccolind = ",MyPID);
320 for(
size_t i=0; i<(size_t)Ccolind.size(); i++) {
321 printf(
"%3d ",(
int)Ccolind.getConst()[i]);
332 template<
class Scalar,
335 class LocalOrdinalViewType>
336 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
337 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
338 const LocalOrdinalViewType & Acol2Brow,
339 const LocalOrdinalViewType & Acol2Irow,
340 const LocalOrdinalViewType & Bcol2Ccol,
341 const LocalOrdinalViewType & Icol2Ccol,
342 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
343 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
344 const std::string& label,
345 const Teuchos::RCP<Teuchos::ParameterList>& params) {
346 #ifdef HAVE_TPETRA_MMM_TIMINGS
347 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
348 using Teuchos::TimeMonitor;
349 Teuchos::RCP<TimeMonitor> MM;
356 int team_work_size = 16;
357 std::string myalg(
"LTG");
358 if(!params.is_null()) {
359 if(params->isParameter(
"openmp: algorithm"))
360 myalg = params->get(
"openmp: algorithm",myalg);
361 if(params->isParameter(
"openmp: team work size"))
362 team_work_size = params->get(
"openmp: team work size",team_work_size);
367 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
370 throw std::runtime_error(
"Tpetra::MatrixMatrix::MMM reuse unknown kernel");
373 #ifdef HAVE_TPETRA_MMM_TIMINGS
374 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse OpenMPESFC"))));
376 C.fillComplete(C.getDomainMap(), C.getRangeMap());
381 template<
class Scalar,
384 class LocalOrdinalViewType>
385 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
386 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
387 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
388 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
389 const LocalOrdinalViewType & Acol2Brow,
390 const LocalOrdinalViewType & Acol2Irow,
391 const LocalOrdinalViewType & Bcol2Ccol,
392 const LocalOrdinalViewType & Icol2Ccol,
393 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
394 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
395 const std::string& label,
396 const Teuchos::RCP<Teuchos::ParameterList>& params) {
398 #ifdef HAVE_TPETRA_MMM_TIMINGS
399 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
400 using Teuchos::TimeMonitor;
401 Teuchos::RCP<TimeMonitor> MM;
408 int team_work_size = 16;
409 std::string myalg(
"LTG");
410 if(!params.is_null()) {
411 if(params->isParameter(
"openmp: jacobi algorithm"))
412 myalg = params->get(
"openmp: jacobi algorithm",myalg);
413 if(params->isParameter(
"openmp: team work size"))
414 team_work_size = params->get(
"openmp: team work size",team_work_size);
419 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
421 else if(myalg ==
"MSAK") {
422 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
424 else if(myalg ==
"KK") {
425 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
428 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
431 #ifdef HAVE_TPETRA_MMM_TIMINGS
432 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPESFC"))));
436 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
437 labelList->set(
"Timer Label",label);
438 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
441 if(!C.isFillComplete()) {
442 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
443 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
451 template<
class Scalar,
454 class LocalOrdinalViewType>
455 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
456 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
457 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
458 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
459 const LocalOrdinalViewType & Acol2Brow,
460 const LocalOrdinalViewType & Acol2Irow,
461 const LocalOrdinalViewType & Bcol2Ccol,
462 const LocalOrdinalViewType & Icol2Ccol,
463 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
464 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
465 const std::string& label,
466 const Teuchos::RCP<Teuchos::ParameterList>& params) {
468 #ifdef HAVE_TPETRA_MMM_TIMINGS
469 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
470 using Teuchos::TimeMonitor;
471 Teuchos::RCP<TimeMonitor> MM;
478 int team_work_size = 16;
479 std::string myalg(
"LTG");
480 if(!params.is_null()) {
481 if(params->isParameter(
"openmp: jacobi algorithm"))
482 myalg = params->get(
"openmp: jacobi algorithm",myalg);
483 if(params->isParameter(
"openmp: team work size"))
484 team_work_size = params->get(
"openmp: team work size",team_work_size);
489 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
492 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
495 #ifdef HAVE_TPETRA_MMM_TIMINGS
496 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse OpenMPESFC"))));
498 C.fillComplete(C.getDomainMap(), C.getRangeMap());
503 template<
class Scalar,
506 class LocalOrdinalViewType>
507 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
508 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
509 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
510 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
511 const LocalOrdinalViewType & Acol2Brow,
512 const LocalOrdinalViewType & Acol2Irow,
513 const LocalOrdinalViewType & Bcol2Ccol,
514 const LocalOrdinalViewType & Icol2Ccol,
515 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
516 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
517 const std::string& label,
518 const Teuchos::RCP<Teuchos::ParameterList>& params) {
520 #ifdef HAVE_TPETRA_MMM_TIMINGS
521 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
522 using Teuchos::TimeMonitor;
523 Teuchos::RCP<TimeMonitor> MM;
530 auto rowMap = Aview.origMatrix->getRowMap();
532 Aview.origMatrix->getLocalDiagCopy(diags);
533 size_t diagLength = rowMap->getNodeNumElements();
534 Teuchos::Array<Scalar> diagonal(diagLength);
535 diags.get1dCopy(diagonal());
537 for(
size_t i = 0; i < diagLength; ++i) {
538 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
540 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
541 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
546 using device_t =
typename Kokkos::Compat::KokkosOpenMPWrapperNode::device_type;
548 using graph_t =
typename matrix_t::StaticCrsGraphType;
549 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
550 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
551 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
552 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
555 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
556 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
557 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
560 c_lno_view_t Irowptr;
561 lno_nnz_view_t Icolind;
563 if(!Bview.importMatrix.is_null()) {
564 Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
565 Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
566 Ivals = Bview.importMatrix->getLocalMatrix().values;
570 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements());
573 const matrix_t & Amat = Aview.origMatrix->getLocalMatrix();
574 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrix();
576 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
577 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
578 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
580 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
581 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
582 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
585 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
586 lno_nnz_view_t entriesC;
587 scalar_view_t valuesC;
590 int team_work_size = 16;
591 std::string myalg(
"SPGEMM_KK_MEMORY");
592 if(!params.is_null()) {
593 if(params->isParameter(
"cuda: algorithm"))
594 myalg = params->get(
"cuda: algorithm",myalg);
595 if(params->isParameter(
"cuda: team work size"))
596 team_work_size = params->get(
"cuda: team work size",team_work_size);
600 std::string nodename(
"OpenMP");
601 std::string alg = nodename + std::string(
" algorithm");
602 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
603 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
608 kh.create_spgemm_handle(alg_enum);
609 kh.set_team_work_size(team_work_size);
611 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
612 Arowptr, Acolind,
false,
613 Browptr, Bcolind,
false,
616 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
618 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
619 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
622 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
623 Arowptr, Acolind, Avals,
false,
624 Browptr, Bcolind, Bvals,
false,
625 row_mapC, entriesC, valuesC,
626 omega, Dinv.getLocalViewDevice());
627 kh.destroy_spgemm_handle();
629 #ifdef HAVE_TPETRA_MMM_TIMINGS
630 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPSort"))));
634 if (params.is_null() || params->get(
"sort entries",
true))
635 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
636 C.setAllValues(row_mapC,entriesC,valuesC);
638 #ifdef HAVE_TPETRA_MMM_TIMINGS
639 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPESFC"))));
643 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
644 labelList->set(
"Timer Label",label);
645 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
646 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
647 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
652 template<
class Scalar,
655 class LocalOrdinalViewType>
656 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
657 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
658 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
659 const LocalOrdinalViewType & Acol2Prow,
660 const LocalOrdinalViewType & Acol2PIrow,
661 const LocalOrdinalViewType & Pcol2Accol,
662 const LocalOrdinalViewType & PIcol2Accol,
663 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
664 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
665 const std::string& label,
666 const Teuchos::RCP<Teuchos::ParameterList>& params) {
670 #ifdef HAVE_TPETRA_MMM_TIMINGS
671 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
672 using Teuchos::TimeMonitor;
673 Teuchos::RCP<TimeMonitor> MM;
677 std::string nodename(
"OpenMP");
680 std::string myalg(
"LTG");
682 if(!params.is_null()) {
683 if(params->isParameter(
"openmp: rap algorithm"))
684 myalg = params->get(
"openmp: rap algorithm",myalg);
689 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
692 throw std::runtime_error(
"Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
697 template<
class Scalar,
700 class LocalOrdinalViewType>
701 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
702 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
703 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
704 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
706 const LocalOrdinalViewType & Acol2Prow,
707 const LocalOrdinalViewType & Acol2Irow,
708 const LocalOrdinalViewType & Pcol2Ccol,
709 const LocalOrdinalViewType & Icol2Ccol,
710 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
711 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
712 const std::string& label,
713 const Teuchos::RCP<Teuchos::ParameterList>& params) {
715 #ifdef HAVE_TPETRA_MMM_TIMINGS
716 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
717 using Teuchos::TimeMonitor;
718 Teuchos::RCP<TimeMonitor> MM;
725 std::string myalg(
"LTG");
726 if(!params.is_null()) {
727 if(params->isParameter(
"openmp: rap algorithm"))
728 myalg = params->get(
"openmp: rap algorithm",myalg);
733 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
736 throw std::runtime_error(
"Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
739 #ifdef HAVE_TPETRA_MMM_TIMINGS
740 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Reuse OpenMPESFC"))));
742 C.fillComplete(C.getDomainMap(), C.getRangeMap());
749 template<
class Scalar,
752 class LocalOrdinalViewType>
753 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
755 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
756 const LocalOrdinalViewType & Acol2Prow,
757 const LocalOrdinalViewType & Acol2PIrow,
758 const LocalOrdinalViewType & Pcol2Accol,
759 const LocalOrdinalViewType & PIcol2Accol,
760 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
761 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
762 const std::string& label,
763 const Teuchos::RCP<Teuchos::ParameterList>& params) {
766 #ifdef HAVE_TPETRA_MMM_TIMINGS
767 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
768 using Teuchos::TimeMonitor;
769 Teuchos::RCP<TimeMonitor> MM;
773 std::string nodename(
"OpenMP");
776 std::string myalg(
"LTG");
778 if(!params.is_null()) {
779 if(params->isParameter(
"openmp: ptap algorithm"))
780 myalg = params->get(
"openmp: ptap algorithm",myalg);
784 #ifdef HAVE_TPETRA_MMM_TIMINGS
785 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
788 using Teuchos::ParameterList;
790 using LO = LocalOrdinal;
791 using GO = GlobalOrdinal;
795 using transposer_type =
796 RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
797 transposer_type transposer (Pview.origMatrix, label +
"XP: ");
798 RCP<ParameterList> transposeParams (
new ParameterList);
799 if (! params.is_null ()) {
800 transposeParams->set (
"compute global constants",
801 params->get (
"compute global constants: temporaries",
804 transposeParams->set (
"sort",
false);
805 RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
806 transposer.createTransposeLocal (transposeParams);
807 CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
808 Rview.origMatrix = Ptrans;
810 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
811 mult_R_A_P_newmatrix_LowThreadGustavsonKernel
812 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
813 PIcol2Accol, Ac, Acimport, label, params);
816 throw std::runtime_error(
"Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
821 template<
class Scalar,
824 class LocalOrdinalViewType>
825 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
827 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
828 const LocalOrdinalViewType & Acol2Prow,
829 const LocalOrdinalViewType & Acol2PIrow,
830 const LocalOrdinalViewType & Pcol2Accol,
831 const LocalOrdinalViewType & PIcol2Accol,
832 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
833 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
834 const std::string& label,
835 const Teuchos::RCP<Teuchos::ParameterList>& params) {
838 #ifdef HAVE_TPETRA_MMM_TIMINGS
839 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
840 using Teuchos::TimeMonitor;
841 Teuchos::RCP<TimeMonitor> MM;
845 std::string nodename(
"OpenMP");
848 std::string myalg(
"LTG");
850 if(!params.is_null()) {
851 if(params->isParameter(
"openmp: ptap algorithm"))
852 myalg = params->get(
"openmp: ptap algorithm",myalg);
856 #ifdef HAVE_TPETRA_MMM_TIMINGS
857 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
860 using Teuchos::ParameterList;
862 using LO = LocalOrdinal;
863 using GO = GlobalOrdinal;
867 using transposer_type =
868 RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
869 transposer_type transposer (Pview.origMatrix, label +
"XP: ");
870 RCP<ParameterList> transposeParams (
new ParameterList);
871 if (! params.is_null ()) {
872 transposeParams->set (
"compute global constants",
873 params->get (
"compute global constants: temporaries",
876 transposeParams->set (
"sort",
false);
877 RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
878 transposer.createTransposeLocal (transposeParams);
879 CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
880 Rview.origMatrix = Ptrans;
882 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
883 mult_R_A_P_reuse_LowThreadGustavsonKernel
884 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
885 PIcol2Accol, Ac, Acimport, label, params);
888 throw std::runtime_error(
"Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
890 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
A distributed dense vector.