42 #ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
45 #ifdef HAVE_TPETRA_INST_OPENMP
51 template<
class Scalar,
53 class GlobalOrdinal,
class LocalOrdinalViewType>
54 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
55 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
57 const LocalOrdinalViewType & Acol2Brow,
58 const LocalOrdinalViewType & Acol2Irow,
59 const LocalOrdinalViewType & Bcol2Ccol,
60 const LocalOrdinalViewType & Icol2Ccol,
61 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
62 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
63 const std::string& label = std::string(),
64 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType & Acol2Brow,
69 const LocalOrdinalViewType & Acol2Irow,
70 const LocalOrdinalViewType & Bcol2Ccol,
71 const LocalOrdinalViewType & Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82 template<
class Scalar,
84 class GlobalOrdinal,
class LocalOrdinalViewType>
85 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
86 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
87 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
88 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
89 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
90 const LocalOrdinalViewType & Acol2Brow,
91 const LocalOrdinalViewType & Acol2Irow,
92 const LocalOrdinalViewType & Bcol2Ccol,
93 const LocalOrdinalViewType & Icol2Ccol,
94 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
95 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
96 const std::string& label = std::string(),
97 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
99 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
100 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
102 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
103 const LocalOrdinalViewType & Acol2Brow,
104 const LocalOrdinalViewType & Acol2Irow,
105 const LocalOrdinalViewType & Bcol2Ccol,
106 const LocalOrdinalViewType & Icol2Ccol,
107 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
108 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
109 const std::string& label = std::string(),
110 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
116 template<
class Scalar,
118 class GlobalOrdinal,
class LocalOrdinalViewType>
119 struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
120 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
121 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
122 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
123 const LocalOrdinalViewType & Acol2Prow,
124 const LocalOrdinalViewType & Acol2PIrow,
125 const LocalOrdinalViewType & Pcol2Ccol,
126 const LocalOrdinalViewType & PIcol2Ccol,
127 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
128 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
129 const std::string& label = std::string(),
130 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
132 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
133 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
134 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
135 const LocalOrdinalViewType & Acol2Prow,
136 const LocalOrdinalViewType & Acol2PIrow,
137 const LocalOrdinalViewType & Pcol2Ccol,
138 const LocalOrdinalViewType & PIcol2Ccol,
139 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
140 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
141 const std::string& label = std::string(),
142 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
144 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
145 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
146 const LocalOrdinalViewType & Acol2Prow,
147 const LocalOrdinalViewType & Acol2PIrow,
148 const LocalOrdinalViewType & Pcol2Ccol,
149 const LocalOrdinalViewType & PIcol2Ccol,
150 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
151 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
152 const std::string& label = std::string(),
153 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
155 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
156 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
157 const LocalOrdinalViewType & Acol2Prow,
158 const LocalOrdinalViewType & Acol2PIrow,
159 const LocalOrdinalViewType & Pcol2Ccol,
160 const LocalOrdinalViewType & PIcol2Ccol,
161 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
162 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
163 const std::string& label = std::string(),
164 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
169 template<
class Scalar,
172 class LocalOrdinalViewType>
173 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
174 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
175 const LocalOrdinalViewType & Acol2Brow,
176 const LocalOrdinalViewType & Acol2Irow,
177 const LocalOrdinalViewType & Bcol2Ccol,
178 const LocalOrdinalViewType & Icol2Ccol,
179 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
180 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
181 const std::string& label,
182 const Teuchos::RCP<Teuchos::ParameterList>& params) {
184 #ifdef HAVE_TPETRA_MMM_TIMINGS
185 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
186 using Teuchos::TimeMonitor;
187 Teuchos::RCP<TimeMonitor> MM;
191 std::string nodename(
"OpenMP");
196 typedef typename KCRS::device_type device_t;
197 typedef typename KCRS::StaticCrsGraphType graph_t;
198 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
199 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
200 typedef typename KCRS::values_type::non_const_type scalar_view_t;
203 int team_work_size = 16;
204 std::string myalg(
"SPGEMM_KK_MEMORY");
207 if(!params.is_null()) {
208 if(params->isParameter(
"openmp: algorithm"))
209 myalg = params->get(
"openmp: algorithm",myalg);
210 if(params->isParameter(
"openmp: team work size"))
211 team_work_size = params->get(
"openmp: team work size",team_work_size);
216 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
220 #ifdef HAVE_TPETRA_MMM_TIMINGS
221 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPWrapper"))));
224 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
225 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
226 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
229 const KCRS & Ak = Aview.origMatrix->getLocalMatrix();
233 std::string alg = nodename+std::string(
" algorithm");
235 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
236 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
239 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements());
241 #ifdef HAVE_TPETRA_MMM_TIMINGS
242 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPCore"))));
246 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
249 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
250 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
253 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lnow_row"), AnumRows + 1);
254 lno_nnz_view_t entriesC;
255 scalar_view_t valuesC;
257 kh.create_spgemm_handle(alg_enum);
258 kh.set_team_work_size(team_work_size);
260 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
262 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
264 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
265 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
268 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
269 kh.destroy_spgemm_handle();
271 #ifdef HAVE_TPETRA_MMM_TIMINGS
272 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPSort"))));
275 if (params.is_null() || params->get(
"sort entries",
true))
276 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
277 C.setAllValues(row_mapC,entriesC,valuesC);
281 #ifdef HAVE_TPETRA_MMM_TIMINGS
282 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix OpenMPESFC"))));
286 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
287 labelList->set(
"Timer Label",label);
288 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
289 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
290 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
294 Teuchos::ArrayRCP< const size_t > Crowptr;
295 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
296 Teuchos::ArrayRCP< const Scalar > Cvalues;
297 C.getAllValues(Crowptr,Ccolind,Cvalues);
300 int MyPID = C->getComm()->getRank();
301 printf(
"[%d] Crowptr = ",MyPID);
302 for(
size_t i=0; i<(size_t) Crowptr.size(); i++) {
303 printf(
"%3d ",(
int)Crowptr.getConst()[i]);
306 printf(
"[%d] Ccolind = ",MyPID);
307 for(
size_t i=0; i<(size_t)Ccolind.size(); i++) {
308 printf(
"%3d ",(
int)Ccolind.getConst()[i]);
319 template<
class Scalar,
322 class LocalOrdinalViewType>
323 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
324 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
325 const LocalOrdinalViewType & Acol2Brow,
326 const LocalOrdinalViewType & Acol2Irow,
327 const LocalOrdinalViewType & Bcol2Ccol,
328 const LocalOrdinalViewType & Icol2Ccol,
329 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
330 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
331 const std::string& label,
332 const Teuchos::RCP<Teuchos::ParameterList>& params) {
333 #ifdef HAVE_TPETRA_MMM_TIMINGS
334 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
335 using Teuchos::TimeMonitor;
336 Teuchos::RCP<TimeMonitor> MM;
343 int team_work_size = 16;
344 std::string myalg(
"LTG");
345 if(!params.is_null()) {
346 if(params->isParameter(
"openmp: algorithm"))
347 myalg = params->get(
"openmp: algorithm",myalg);
348 if(params->isParameter(
"openmp: team work size"))
349 team_work_size = params->get(
"openmp: team work size",team_work_size);
354 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
357 throw std::runtime_error(
"Tpetra::MatrixMatrix::MMM reuse unknown kernel");
360 #ifdef HAVE_TPETRA_MMM_TIMINGS
361 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse OpenMPESFC"))));
363 C.fillComplete(C.getDomainMap(), C.getRangeMap());
368 template<
class Scalar,
371 class LocalOrdinalViewType>
372 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
373 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
374 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
375 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
376 const LocalOrdinalViewType & Acol2Brow,
377 const LocalOrdinalViewType & Acol2Irow,
378 const LocalOrdinalViewType & Bcol2Ccol,
379 const LocalOrdinalViewType & Icol2Ccol,
380 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
381 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
382 const std::string& label,
383 const Teuchos::RCP<Teuchos::ParameterList>& params) {
385 #ifdef HAVE_TPETRA_MMM_TIMINGS
386 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
387 using Teuchos::TimeMonitor;
388 Teuchos::RCP<TimeMonitor> MM;
395 int team_work_size = 16;
396 std::string myalg(
"LTG");
397 if(!params.is_null()) {
398 if(params->isParameter(
"openmp: jacobi algorithm"))
399 myalg = params->get(
"openmp: jacobi algorithm",myalg);
400 if(params->isParameter(
"openmp: team work size"))
401 team_work_size = params->get(
"openmp: team work size",team_work_size);
406 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
408 else if(myalg ==
"MSAK") {
409 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
412 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
415 #ifdef HAVE_TPETRA_MMM_TIMINGS
416 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix OpenMPESFC"))));
420 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
421 labelList->set(
"Timer Label",label);
422 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
425 if(!C.isFillComplete()) {
426 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
427 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
435 template<
class Scalar,
438 class LocalOrdinalViewType>
439 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
440 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
441 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
442 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
443 const LocalOrdinalViewType & Acol2Brow,
444 const LocalOrdinalViewType & Acol2Irow,
445 const LocalOrdinalViewType & Bcol2Ccol,
446 const LocalOrdinalViewType & Icol2Ccol,
447 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
448 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
449 const std::string& label,
450 const Teuchos::RCP<Teuchos::ParameterList>& params) {
452 #ifdef HAVE_TPETRA_MMM_TIMINGS
453 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
454 using Teuchos::TimeMonitor;
455 Teuchos::RCP<TimeMonitor> MM;
462 int team_work_size = 16;
463 std::string myalg(
"LTG");
464 if(!params.is_null()) {
465 if(params->isParameter(
"openmp: jacobi algorithm"))
466 myalg = params->get(
"openmp: jacobi algorithm",myalg);
467 if(params->isParameter(
"openmp: team work size"))
468 team_work_size = params->get(
"openmp: team work size",team_work_size);
473 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
476 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
479 #ifdef HAVE_TPETRA_MMM_TIMINGS
480 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse OpenMPESFC"))));
482 C.fillComplete(C.getDomainMap(), C.getRangeMap());
488 template<
class Scalar,
491 class LocalOrdinalViewType>
492 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
493 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
494 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
495 const LocalOrdinalViewType & Acol2Prow,
496 const LocalOrdinalViewType & Acol2PIrow,
497 const LocalOrdinalViewType & Pcol2Accol,
498 const LocalOrdinalViewType & PIcol2Accol,
499 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
500 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
501 const std::string& label,
502 const Teuchos::RCP<Teuchos::ParameterList>& params) {
506 #ifdef HAVE_TPETRA_MMM_TIMINGS
507 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
508 using Teuchos::TimeMonitor;
509 Teuchos::RCP<TimeMonitor> MM;
513 std::string nodename(
"OpenMP");
516 std::string myalg(
"LTG");
518 if(!params.is_null()) {
519 if(params->isParameter(
"openmp: rap algorithm"))
520 myalg = params->get(
"openmp: rap algorithm",myalg);
525 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
528 throw std::runtime_error(
"Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
533 template<
class Scalar,
536 class LocalOrdinalViewType>
537 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
538 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
539 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
540 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
542 const LocalOrdinalViewType & Acol2Prow,
543 const LocalOrdinalViewType & Acol2Irow,
544 const LocalOrdinalViewType & Pcol2Ccol,
545 const LocalOrdinalViewType & Icol2Ccol,
546 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
547 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
548 const std::string& label,
549 const Teuchos::RCP<Teuchos::ParameterList>& params) {
551 #ifdef HAVE_TPETRA_MMM_TIMINGS
552 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
553 using Teuchos::TimeMonitor;
554 Teuchos::RCP<TimeMonitor> MM;
561 std::string myalg(
"LTG");
562 if(!params.is_null()) {
563 if(params->isParameter(
"openmp: rap algorithm"))
564 myalg = params->get(
"openmp: rap algorithm",myalg);
569 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
572 throw std::runtime_error(
"Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
575 #ifdef HAVE_TPETRA_MMM_TIMINGS
576 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"RAP Reuse OpenMPESFC"))));
578 C.fillComplete(C.getDomainMap(), C.getRangeMap());
585 template<
class Scalar,
588 class LocalOrdinalViewType>
589 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
591 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
592 const LocalOrdinalViewType & Acol2Prow,
593 const LocalOrdinalViewType & Acol2PIrow,
594 const LocalOrdinalViewType & Pcol2Accol,
595 const LocalOrdinalViewType & PIcol2Accol,
596 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
597 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
598 const std::string& label,
599 const Teuchos::RCP<Teuchos::ParameterList>& params) {
602 #ifdef HAVE_TPETRA_MMM_TIMINGS
603 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
604 using Teuchos::TimeMonitor;
605 Teuchos::RCP<TimeMonitor> MM;
609 std::string nodename(
"OpenMP");
612 std::string myalg(
"LTG");
614 if(!params.is_null()) {
615 if(params->isParameter(
"openmp: ptap algorithm"))
616 myalg = params->get(
"openmp: ptap algorithm",myalg);
620 #ifdef HAVE_TPETRA_MMM_TIMINGS
621 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
624 typedef RowMatrixTransposer<Scalar,LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> transposer_type;
625 transposer_type transposer (Pview.origMatrix,label+std::string(
"XP: "));
626 Teuchos::RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(
new Teuchos::ParameterList);
627 if (!params.is_null())
628 transposeParams->set(
"compute global constants",
629 params->get(
"compute global constants: temporaries",
631 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans = transposer.createTransposeLocal(transposeParams);
632 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
633 Rview.origMatrix = Ptrans;
634 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
637 throw std::runtime_error(
"Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
642 template<
class Scalar,
645 class LocalOrdinalViewType>
646 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
648 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
649 const LocalOrdinalViewType & Acol2Prow,
650 const LocalOrdinalViewType & Acol2PIrow,
651 const LocalOrdinalViewType & Pcol2Accol,
652 const LocalOrdinalViewType & PIcol2Accol,
653 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
654 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
655 const std::string& label,
656 const Teuchos::RCP<Teuchos::ParameterList>& params) {
659 #ifdef HAVE_TPETRA_MMM_TIMINGS
660 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
661 using Teuchos::TimeMonitor;
662 Teuchos::RCP<TimeMonitor> MM;
666 std::string nodename(
"OpenMP");
669 std::string myalg(
"LTG");
671 if(!params.is_null()) {
672 if(params->isParameter(
"openmp: ptap algorithm"))
673 myalg = params->get(
"openmp: ptap algorithm",myalg);
677 #ifdef HAVE_TPETRA_MMM_TIMINGS
678 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"PTAP local transpose"))));
681 typedef RowMatrixTransposer<Scalar,LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> transposer_type;
682 transposer_type transposer (Pview.origMatrix,label+std::string(
"XP: "));
683 Teuchos::RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(
new Teuchos::ParameterList);
684 if (!params.is_null())
685 transposeParams->set(
"compute global constants",
686 params->get(
"compute global constants: temporaries",
688 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans = transposer.createTransposeLocal(transposeParams);
689 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
690 Rview.origMatrix = Ptrans;
691 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
694 throw std::runtime_error(
"Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
696 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...