Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_OpenMP.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
12 
13 #ifdef HAVE_TPETRA_INST_OPENMP
14 namespace Tpetra {
15 namespace MMdetails {
16 
17 /*********************************************************************************************************/
18 // MMM KernelWrappers for Partial Specialization to OpenMP
19 template <class Scalar,
20  class LocalOrdinal,
21  class GlobalOrdinal, class LocalOrdinalViewType>
22 struct KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
23  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
24  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
25  const LocalOrdinalViewType& Acol2Brow,
26  const LocalOrdinalViewType& Acol2Irow,
27  const LocalOrdinalViewType& Bcol2Ccol,
28  const LocalOrdinalViewType& Icol2Ccol,
29  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
30  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
31  const std::string& label = std::string(),
32  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
33 
34  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
35  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
36  const LocalOrdinalViewType& Acol2Brow,
37  const LocalOrdinalViewType& Acol2Irow,
38  const LocalOrdinalViewType& Bcol2Ccol,
39  const LocalOrdinalViewType& Icol2Ccol,
40  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
41  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
42  const std::string& label = std::string(),
43  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
44 };
45 
46 // Jacobi KernelWrappers for Partial Specialization to OpenMP
47 template <class Scalar,
48  class LocalOrdinal,
49  class GlobalOrdinal, class LocalOrdinalViewType>
50 struct KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
51  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
52  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
53  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
54  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
55  const LocalOrdinalViewType& Acol2Brow,
56  const LocalOrdinalViewType& Acol2Irow,
57  const LocalOrdinalViewType& Bcol2Ccol,
58  const LocalOrdinalViewType& Icol2Ccol,
59  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
60  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
61  const std::string& label = std::string(),
62  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
63 
64  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
65  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
66  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
67  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
68  const LocalOrdinalViewType& Acol2Brow,
69  const LocalOrdinalViewType& Acol2Irow,
70  const LocalOrdinalViewType& Bcol2Ccol,
71  const LocalOrdinalViewType& Icol2Ccol,
72  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
73  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
74  const std::string& label = std::string(),
75  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76 
77  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
78  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
79  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
80  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
81  const LocalOrdinalViewType& Acol2Brow,
82  const LocalOrdinalViewType& Acol2Irow,
83  const LocalOrdinalViewType& Bcol2Ccol,
84  const LocalOrdinalViewType& Icol2Ccol,
85  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
86  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
87  const std::string& label = std::string(),
88  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
89 };
90 
91 // Triple-Product KernelWrappers for Partial Specialization to OpenMP
92 template <class Scalar,
93  class LocalOrdinal,
94  class GlobalOrdinal, class LocalOrdinalViewType>
95 struct KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType> {
96  static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
97  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
98  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
99  const LocalOrdinalViewType& Acol2Prow,
100  const LocalOrdinalViewType& Acol2PIrow,
101  const LocalOrdinalViewType& Pcol2Ccol,
102  const LocalOrdinalViewType& PIcol2Ccol,
103  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
104  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
105  const std::string& label = std::string(),
106  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
107 
108  static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
109  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
110  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
111  const LocalOrdinalViewType& Acol2Prow,
112  const LocalOrdinalViewType& Acol2PIrow,
113  const LocalOrdinalViewType& Pcol2Ccol,
114  const LocalOrdinalViewType& PIcol2Ccol,
115  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
116  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
117  const std::string& label = std::string(),
118  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
119 
120  static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
121  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
122  const LocalOrdinalViewType& Acol2Prow,
123  const LocalOrdinalViewType& Acol2PIrow,
124  const LocalOrdinalViewType& Pcol2Ccol,
125  const LocalOrdinalViewType& PIcol2Ccol,
126  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
127  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
128  const std::string& label = std::string(),
129  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
130 
131  static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
132  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
133  const LocalOrdinalViewType& Acol2Prow,
134  const LocalOrdinalViewType& Acol2PIrow,
135  const LocalOrdinalViewType& Pcol2Ccol,
136  const LocalOrdinalViewType& PIcol2Ccol,
137  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
138  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
139  const std::string& label = std::string(),
140  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
141 };
142 
143 /*********************************************************************************************************/
144 template <class Scalar,
145  class LocalOrdinal,
146  class GlobalOrdinal,
147  class LocalOrdinalViewType>
148 void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
149  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
150  const LocalOrdinalViewType& Acol2Brow,
151  const LocalOrdinalViewType& Acol2Irow,
152  const LocalOrdinalViewType& Bcol2Ccol,
153  const LocalOrdinalViewType& Icol2Ccol,
154  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
155  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
156  const std::string& label,
157  const Teuchos::RCP<Teuchos::ParameterList>& params) {
158 #ifdef HAVE_TPETRA_MMM_TIMINGS
159  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
160  using Teuchos::TimeMonitor;
161  Teuchos::RCP<TimeMonitor> MM;
162 #endif
163 
164  // Node-specific code
165  std::string nodename("OpenMP");
166 
167  // Lots and lots of typedefs
168  using Teuchos::RCP;
170  typedef typename KCRS::device_type device_t;
171  typedef typename KCRS::StaticCrsGraphType graph_t;
172  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
173  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
174  typedef typename KCRS::values_type::non_const_type scalar_view_t;
175 
176  // Options
177  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
178  std::string myalg("SPGEMM_KK_MEMORY");
179 
180  if (!params.is_null()) {
181  if (params->isParameter("openmp: algorithm"))
182  myalg = params->get("openmp: algorithm", myalg);
183  if (params->isParameter("openmp: team work size"))
184  team_work_size = params->get("openmp: team work size", team_work_size);
185  }
186 
187  if (myalg == "LTG") {
188  // Use the LTG kernel if requested
189  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
190  } else {
191  // Use the Kokkos-Kernels OpenMP Kernel
192 #ifdef HAVE_TPETRA_MMM_TIMINGS
193  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
194 #endif
195  // KokkosKernelsHandle
196  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
197  typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
198  typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>
199  KernelHandle;
200 
201  // Grab the Kokkos::SparseCrsMatrices
202  const KCRS& Ak = Aview.origMatrix->getLocalMatrixDevice();
203  // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
204 
205  // Get the algorithm mode
206  std::string alg = nodename + std::string(" algorithm");
207  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
208  if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
209  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
210 
211  // Merge the B and Bimport matrices
212  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
213 
214 #ifdef HAVE_TPETRA_MMM_TIMINGS
215  MM = Teuchos::null;
216  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
217 #endif
218 
219  // Do the multiply on whatever we've got
220  typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
221  // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
222  // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
223  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
224  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
225 
226  lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
227  lno_nnz_view_t entriesC;
228  scalar_view_t valuesC;
229  KernelHandle kh;
230  kh.create_spgemm_handle(alg_enum);
231  kh.set_team_work_size(team_work_size);
232  // KokkosSparse::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
233  KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, false, Bmerged.graph.row_map, Bmerged.graph.entries, false, row_mapC);
234 
235  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
236  if (c_nnz_size) {
237  entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
238  valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
239  }
240  // KokkosSparse::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
241  KokkosSparse::spgemm_numeric(&kh, AnumRows, BnumRows, BnumCols, Ak.graph.row_map, Ak.graph.entries, Ak.values, false, Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false, row_mapC, entriesC, valuesC);
242  kh.destroy_spgemm_handle();
243 
244 #ifdef HAVE_TPETRA_MMM_TIMINGS
245  MM = Teuchos::null;
246  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
247 #endif
248  // Sort & set values
249  if (params.is_null() || params->get("sort entries", true))
250  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
251  C.setAllValues(row_mapC, entriesC, valuesC);
252 
253  } // end OMP KokkosKernels loop
254 
255 #ifdef HAVE_TPETRA_MMM_TIMINGS
256  MM = Teuchos::null;
257  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
258 #endif
259 
260  // Final Fillcomplete
261  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
262  labelList->set("Timer Label", label);
263  if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
264  RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
265  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
266 
267 #if 0
268  {
269  Teuchos::ArrayRCP< const size_t > Crowptr;
270  Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
271  Teuchos::ArrayRCP< const Scalar > Cvalues;
272  C.getAllValues(Crowptr,Ccolind,Cvalues);
273 
274  // DEBUG
275  int MyPID = C->getComm()->getRank();
276  printf("[%d] Crowptr = ",MyPID);
277  for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
278  printf("%3d ",(int)Crowptr.getConst()[i]);
279  }
280  printf("\n");
281  printf("[%d] Ccolind = ",MyPID);
282  for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
283  printf("%3d ",(int)Ccolind.getConst()[i]);
284  }
285  printf("\n");
286  fflush(stdout);
287  // END DEBUG
288  }
289 #endif
290 }
291 
292 /*********************************************************************************************************/
293 template <class Scalar,
294  class LocalOrdinal,
295  class GlobalOrdinal,
296  class LocalOrdinalViewType>
297 void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
298  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
299  const LocalOrdinalViewType& Acol2Brow,
300  const LocalOrdinalViewType& Acol2Irow,
301  const LocalOrdinalViewType& Bcol2Ccol,
302  const LocalOrdinalViewType& Icol2Ccol,
303  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
304  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
305  const std::string& label,
306  const Teuchos::RCP<Teuchos::ParameterList>& params) {
307 #ifdef HAVE_TPETRA_MMM_TIMINGS
308  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
309  using Teuchos::TimeMonitor;
310  Teuchos::RCP<TimeMonitor> MM;
311 #endif
312 
313  // Lots and lots of typedefs
314  using Teuchos::RCP;
315 
316  // Options
317  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
318  std::string myalg("LTG");
319  if (!params.is_null()) {
320  if (params->isParameter("openmp: algorithm"))
321  myalg = params->get("openmp: algorithm", myalg);
322  if (params->isParameter("openmp: team work size"))
323  team_work_size = params->get("openmp: team work size", team_work_size);
324  }
325 
326  if (myalg == "LTG") {
327  // Use the LTG kernel if requested
328  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
329  } else {
330  throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
331  }
332 
333 #ifdef HAVE_TPETRA_MMM_TIMINGS
334  MM = Teuchos::null;
335  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
336 #endif
337  C.fillComplete(C.getDomainMap(), C.getRangeMap());
338 }
339 
340 /*********************************************************************************************************/
341 template <class Scalar,
342  class LocalOrdinal,
343  class GlobalOrdinal,
344  class LocalOrdinalViewType>
345 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
346  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
347  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
348  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
349  const LocalOrdinalViewType& Acol2Brow,
350  const LocalOrdinalViewType& Acol2Irow,
351  const LocalOrdinalViewType& Bcol2Ccol,
352  const LocalOrdinalViewType& Icol2Ccol,
353  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
354  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
355  const std::string& label,
356  const Teuchos::RCP<Teuchos::ParameterList>& params) {
357 #ifdef HAVE_TPETRA_MMM_TIMINGS
358  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
359  using Teuchos::TimeMonitor;
360  Teuchos::RCP<TimeMonitor> MM;
361 #endif
362 
363  // Node-specific code
364  using Teuchos::RCP;
365 
366  // Options
367  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
368  std::string myalg("LTG");
369  if (!params.is_null()) {
370  if (params->isParameter("openmp: jacobi algorithm"))
371  myalg = params->get("openmp: jacobi algorithm", myalg);
372  if (params->isParameter("openmp: team work size"))
373  team_work_size = params->get("openmp: team work size", team_work_size);
374  }
375 
376  if (myalg == "LTG") {
377  // Use the LTG kernel if requested
378  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
379  } else if (myalg == "MSAK") {
380  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
381  } else if (myalg == "KK") {
382  jacobi_A_B_newmatrix_KokkosKernels(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
383  } else {
384  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
385  }
386 
387 #ifdef HAVE_TPETRA_MMM_TIMINGS
388  MM = Teuchos::null;
389  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
390 #endif
391 
392  // Final Fillcomplete
393  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
394  labelList->set("Timer Label", label);
395  if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
396 
397  // NOTE: MSAK already fillCompletes, so we have to check here
398  if (!C.isFillComplete()) {
399  RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
400  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
401  }
402 }
403 
404 /*********************************************************************************************************/
405 template <class Scalar,
406  class LocalOrdinal,
407  class GlobalOrdinal,
408  class LocalOrdinalViewType>
409 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
410  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
411  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
412  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
413  const LocalOrdinalViewType& Acol2Brow,
414  const LocalOrdinalViewType& Acol2Irow,
415  const LocalOrdinalViewType& Bcol2Ccol,
416  const LocalOrdinalViewType& Icol2Ccol,
417  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
418  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
419  const std::string& label,
420  const Teuchos::RCP<Teuchos::ParameterList>& params) {
421 #ifdef HAVE_TPETRA_MMM_TIMINGS
422  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
423  using Teuchos::TimeMonitor;
424  Teuchos::RCP<TimeMonitor> MM;
425 #endif
426 
427  // Lots and lots of typedefs
428  using Teuchos::RCP;
429 
430  // Options
431  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
432  std::string myalg("LTG");
433  if (!params.is_null()) {
434  if (params->isParameter("openmp: jacobi algorithm"))
435  myalg = params->get("openmp: jacobi algorithm", myalg);
436  if (params->isParameter("openmp: team work size"))
437  team_work_size = params->get("openmp: team work size", team_work_size);
438  }
439 
440  if (myalg == "LTG") {
441  // Use the LTG kernel if requested
442  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega, Dinv, Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
443  } else {
444  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
445  }
446 
447 #ifdef HAVE_TPETRA_MMM_TIMINGS
448  MM = Teuchos::null;
449  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
450 #endif
451  C.fillComplete(C.getDomainMap(), C.getRangeMap());
452 }
453 
454 /*********************************************************************************************************/
455 template <class Scalar,
456  class LocalOrdinal,
457  class GlobalOrdinal,
458  class LocalOrdinalViewType>
459 void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
460  const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Dinv,
461  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
462  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
463  const LocalOrdinalViewType& Acol2Brow,
464  const LocalOrdinalViewType& Acol2Irow,
465  const LocalOrdinalViewType& Bcol2Ccol,
466  const LocalOrdinalViewType& Icol2Ccol,
467  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
468  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
469  const std::string& label,
470  const Teuchos::RCP<Teuchos::ParameterList>& params) {
471 #ifdef HAVE_TPETRA_MMM_TIMINGS
472  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
473  using Teuchos::TimeMonitor;
474  Teuchos::RCP<TimeMonitor> MM;
475 #endif
476 
477  // Check if the diagonal entries exist in debug mode
478  const bool debug = Tpetra::Details::Behavior::debug();
479  if (debug) {
480  auto rowMap = Aview.origMatrix->getRowMap();
482  Aview.origMatrix->getLocalDiagCopy(diags);
483  size_t diagLength = rowMap->getLocalNumElements();
484  Teuchos::Array<Scalar> diagonal(diagLength);
485  diags.get1dCopy(diagonal());
486 
487  for (size_t i = 0; i < diagLength; ++i) {
488  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
489  std::runtime_error,
490  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl
491  << "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
492  }
493  }
494 
495  // Usings
496  using device_t = typename Tpetra::KokkosCompat::KokkosOpenMPWrapperNode::device_type;
498  using graph_t = typename matrix_t::StaticCrsGraphType;
499  using lno_view_t = typename graph_t::row_map_type::non_const_type;
500  using c_lno_view_t = typename graph_t::row_map_type::const_type;
501  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
502  using scalar_view_t = typename matrix_t::values_type::non_const_type;
503 
504  // KokkosKernels handle
505  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
506  typename lno_view_t::const_value_type, typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
507  typename device_t::execution_space, typename device_t::memory_space, typename device_t::memory_space>;
508 
509  // Get the rowPtr, colInd and vals of importMatrix
510  c_lno_view_t Irowptr;
511  lno_nnz_view_t Icolind;
512  scalar_view_t Ivals;
513  if (!Bview.importMatrix.is_null()) {
514  auto lclB = Bview.importMatrix->getLocalMatrixDevice();
515  Irowptr = lclB.graph.row_map;
516  Icolind = lclB.graph.entries;
517  Ivals = lclB.values;
518  }
519 
520  // Merge the B and Bimport matrices
521  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview, Acol2Brow, Acol2Irow, Bcol2Ccol, Icol2Ccol, C.getColMap()->getLocalNumElements());
522 
523  // Get the properties and arrays of input matrices
524  const matrix_t& Amat = Aview.origMatrix->getLocalMatrixDevice();
525  const matrix_t& Bmat = Bview.origMatrix->getLocalMatrixDevice();
526 
527  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
528  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
529  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
530 
531  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
532  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
533  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
534 
535  // Arrays of the output matrix
536  lno_view_t row_mapC(Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
537  lno_nnz_view_t entriesC;
538  scalar_view_t valuesC;
539 
540  // Options
541  int team_work_size = 16;
542  std::string myalg("SPGEMM_KK_MEMORY");
543  if (!params.is_null()) {
544  if (params->isParameter("cuda: algorithm"))
545  myalg = params->get("cuda: algorithm", myalg);
546  if (params->isParameter("cuda: team work size"))
547  team_work_size = params->get("cuda: team work size", team_work_size);
548  }
549 
550  // Get the algorithm mode
551  std::string nodename("OpenMP");
552  std::string alg = nodename + std::string(" algorithm");
553  if (!params.is_null() && params->isParameter(alg)) myalg = params->get(alg, myalg);
554  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
555 
556  // KokkosKernels call
557  handle_t kh;
558  kh.create_spgemm_handle(alg_enum);
559  kh.set_team_work_size(team_work_size);
560 
561  KokkosSparse::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
562  Arowptr, Acolind, false,
563  Browptr, Bcolind, false,
564  row_mapC);
565 
566  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
567  if (c_nnz_size) {
568  entriesC = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
569  valuesC = scalar_view_t(Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
570  }
571 
572  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
573  Arowptr, Acolind, Avals, false,
574  Browptr, Bcolind, Bvals, false,
575  row_mapC, entriesC, valuesC,
576  omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
577  kh.destroy_spgemm_handle();
578 
579 #ifdef HAVE_TPETRA_MMM_TIMINGS
580  MM = Teuchos::null;
581  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
582 #endif
583 
584  // Sort & set values
585  if (params.is_null() || params->get("sort entries", true))
586  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
587  C.setAllValues(row_mapC, entriesC, valuesC);
588 
589 #ifdef HAVE_TPETRA_MMM_TIMINGS
590  MM = Teuchos::null;
591  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
592 #endif
593 
594  // Final Fillcomplete
595  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
596  labelList->set("Timer Label", label);
597  if (!params.is_null()) labelList->set("compute global constants", params->get("compute global constants", true));
598  Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
599  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
600 }
601 
602 /*********************************************************************************************************/
603 template <class Scalar,
604  class LocalOrdinal,
605  class GlobalOrdinal,
606  class LocalOrdinalViewType>
607 void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
608  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
609  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
610  const LocalOrdinalViewType& Acol2Prow,
611  const LocalOrdinalViewType& Acol2PIrow,
612  const LocalOrdinalViewType& Pcol2Accol,
613  const LocalOrdinalViewType& PIcol2Accol,
614  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
615  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
616  const std::string& label,
617  const Teuchos::RCP<Teuchos::ParameterList>& params) {
618 #ifdef HAVE_TPETRA_MMM_TIMINGS
619  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
620  using Teuchos::TimeMonitor;
621  Teuchos::RCP<TimeMonitor> MM;
622 #endif
623 
624  // Node-specific code
625  std::string nodename("OpenMP");
626 
627  // Options
628  std::string myalg("LTG");
629 
630  if (!params.is_null()) {
631  if (params->isParameter("openmp: rap algorithm"))
632  myalg = params->get("openmp: rap algorithm", myalg);
633  }
634 
635  if (myalg == "LTG") {
636  // Use the LTG kernel if requested
637  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol, PIcol2Accol, Ac, Acimport, label, params);
638  } else {
639  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
640  }
641 }
642 
643 /*********************************************************************************************************/
644 template <class Scalar,
645  class LocalOrdinal,
646  class GlobalOrdinal,
647  class LocalOrdinalViewType>
648 void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
649  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
650  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
651  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
652 
653  const LocalOrdinalViewType& Acol2Prow,
654  const LocalOrdinalViewType& Acol2Irow,
655  const LocalOrdinalViewType& Pcol2Ccol,
656  const LocalOrdinalViewType& Icol2Ccol,
657  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
658  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
659  const std::string& label,
660  const Teuchos::RCP<Teuchos::ParameterList>& params) {
661 #ifdef HAVE_TPETRA_MMM_TIMINGS
662  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
663  using Teuchos::TimeMonitor;
664  Teuchos::RCP<TimeMonitor> MM;
665 #endif
666 
667  // Lots and lots of typedefs
668  using Teuchos::RCP;
669 
670  // Options
671  std::string myalg("LTG");
672  if (!params.is_null()) {
673  if (params->isParameter("openmp: rap algorithm"))
674  myalg = params->get("openmp: rap algorithm", myalg);
675  }
676 
677  if (myalg == "LTG") {
678  // Use the LTG kernel if requested
679  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2Irow, Pcol2Ccol, Icol2Ccol, C, Cimport, label, params);
680  } else {
681  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
682  }
683 
684 #ifdef HAVE_TPETRA_MMM_TIMINGS
685  MM = Teuchos::null;
686  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
687 #endif
688  C.fillComplete(C.getDomainMap(), C.getRangeMap());
689 }
690 
691 /*********************************************************************************************************/
692 template <class Scalar,
693  class LocalOrdinal,
694  class GlobalOrdinal,
695  class LocalOrdinalViewType>
696 void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
697 
698  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
699  const LocalOrdinalViewType& Acol2Prow,
700  const LocalOrdinalViewType& Acol2PIrow,
701  const LocalOrdinalViewType& Pcol2Accol,
702  const LocalOrdinalViewType& PIcol2Accol,
703  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
704  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
705  const std::string& label,
706  const Teuchos::RCP<Teuchos::ParameterList>& params) {
707 #ifdef HAVE_TPETRA_MMM_TIMINGS
708  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
709  using Teuchos::TimeMonitor;
710  Teuchos::RCP<TimeMonitor> MM;
711 #endif
712 
713  // Node-specific code
714  std::string nodename("OpenMP");
715 
716  // Options
717  std::string myalg("LTG");
718 
719  if (!params.is_null()) {
720  if (params->isParameter("openmp: ptap algorithm"))
721  myalg = params->get("openmp: ptap algorithm", myalg);
722  }
723 
724  if (myalg == "LTG") {
725 #ifdef HAVE_TPETRA_MMM_TIMINGS
726  MM = Teuchos::null;
727  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
728 #endif
729 
730  using Teuchos::ParameterList;
731  using Teuchos::RCP;
732  using LO = LocalOrdinal;
733  using GO = GlobalOrdinal;
734  using SC = Scalar;
735 
736  // We don't need a kernel-level PTAP, we just transpose here
737  using transposer_type =
738  RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
739  transposer_type transposer(Pview.origMatrix, label + "XP: ");
740  RCP<ParameterList> transposeParams(new ParameterList);
741  if (!params.is_null()) {
742  transposeParams->set("compute global constants",
743  params->get("compute global constants: temporaries",
744  false));
745  }
746  transposeParams->set("sort", false);
747  RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
748  transposer.createTransposeLocal(transposeParams);
749  CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
750  Rview.origMatrix = Ptrans;
751 
752  using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
753  mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
754  PIcol2Accol, Ac, Acimport, label, params);
755  } else {
756  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
757  }
758 }
759 
760 /*********************************************************************************************************/
761 template <class Scalar,
762  class LocalOrdinal,
763  class GlobalOrdinal,
764  class LocalOrdinalViewType>
765 void KernelWrappers3<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode, LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
766 
767  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
768  const LocalOrdinalViewType& Acol2Prow,
769  const LocalOrdinalViewType& Acol2PIrow,
770  const LocalOrdinalViewType& Pcol2Accol,
771  const LocalOrdinalViewType& PIcol2Accol,
772  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
773  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
774  const std::string& label,
775  const Teuchos::RCP<Teuchos::ParameterList>& params) {
776 #ifdef HAVE_TPETRA_MMM_TIMINGS
777  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
778  using Teuchos::TimeMonitor;
779  Teuchos::RCP<TimeMonitor> MM;
780 #endif
781 
782  // Node-specific code
783  std::string nodename("OpenMP");
784 
785  // Options
786  std::string myalg("LTG");
787 
788  if (!params.is_null()) {
789  if (params->isParameter("openmp: ptap algorithm"))
790  myalg = params->get("openmp: ptap algorithm", myalg);
791  }
792 
793  if (myalg == "LTG") {
794 #ifdef HAVE_TPETRA_MMM_TIMINGS
795  MM = Teuchos::null;
796  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
797 #endif
798 
799  using Teuchos::ParameterList;
800  using Teuchos::RCP;
801  using LO = LocalOrdinal;
802  using GO = GlobalOrdinal;
803  using SC = Scalar;
804 
805  // We don't need a kernel-level PTAP, we just transpose here
806  using transposer_type =
807  RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
808  transposer_type transposer(Pview.origMatrix, label + "XP: ");
809  RCP<ParameterList> transposeParams(new ParameterList);
810  if (!params.is_null()) {
811  transposeParams->set("compute global constants",
812  params->get("compute global constants: temporaries",
813  false));
814  }
815  transposeParams->set("sort", false);
816  RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
817  transposer.createTransposeLocal(transposeParams);
818  CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
819  Rview.origMatrix = Ptrans;
820 
821  using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
822  mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
823  PIcol2Accol, Ac, Acimport, label, params);
824  } else {
825  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
826  }
827  Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
828 }
829 
830 } // namespace MMdetails
831 } // namespace Tpetra
832 
833 #endif // OpenMP
834 
835 #endif
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.