Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_OpenMP.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
11 #define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
12 
13 #ifdef HAVE_TPETRA_INST_OPENMP
14 namespace Tpetra {
15 namespace MMdetails {
16 
17 /*********************************************************************************************************/
18 // MMM KernelWrappers for Partial Specialization to OpenMP
19 template<class Scalar,
20  class LocalOrdinal,
21  class GlobalOrdinal, class LocalOrdinalViewType>
22 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
23  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
24  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
25  const LocalOrdinalViewType & Acol2Brow,
26  const LocalOrdinalViewType & Acol2Irow,
27  const LocalOrdinalViewType & Bcol2Ccol,
28  const LocalOrdinalViewType & Icol2Ccol,
29  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
30  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
31  const std::string& label = std::string(),
32  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
33 
34  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
35  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
36  const LocalOrdinalViewType & Acol2Brow,
37  const LocalOrdinalViewType & Acol2Irow,
38  const LocalOrdinalViewType & Bcol2Ccol,
39  const LocalOrdinalViewType & Icol2Ccol,
40  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
41  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
42  const std::string& label = std::string(),
43  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
44 
45 
46 };
47 
48 
49 // Jacobi KernelWrappers for Partial Specialization to OpenMP
50 template<class Scalar,
51  class LocalOrdinal,
52  class GlobalOrdinal, class LocalOrdinalViewType>
53 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
54  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
55  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
56  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
57  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
58  const LocalOrdinalViewType & Acol2Brow,
59  const LocalOrdinalViewType & Acol2Irow,
60  const LocalOrdinalViewType & Bcol2Ccol,
61  const LocalOrdinalViewType & Icol2Ccol,
62  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
63  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
64  const std::string& label = std::string(),
65  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66 
67  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
68  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
69  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
70  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
71  const LocalOrdinalViewType & Acol2Brow,
72  const LocalOrdinalViewType & Acol2Irow,
73  const LocalOrdinalViewType & Bcol2Ccol,
74  const LocalOrdinalViewType & Icol2Ccol,
75  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
76  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
77  const std::string& label = std::string(),
78  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
79 
80  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
81  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
82  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
83  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
84  const LocalOrdinalViewType & Acol2Brow,
85  const LocalOrdinalViewType & Acol2Irow,
86  const LocalOrdinalViewType & Bcol2Ccol,
87  const LocalOrdinalViewType & Icol2Ccol,
88  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
89  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
90  const std::string& label = std::string(),
91  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
92 
93 };
94 
95 
96 // Triple-Product KernelWrappers for Partial Specialization to OpenMP
97 template<class Scalar,
98  class LocalOrdinal,
99  class GlobalOrdinal, class LocalOrdinalViewType>
100 struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
101  static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
102  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
103  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
104  const LocalOrdinalViewType & Acol2Prow,
105  const LocalOrdinalViewType & Acol2PIrow,
106  const LocalOrdinalViewType & Pcol2Ccol,
107  const LocalOrdinalViewType & PIcol2Ccol,
108  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
109  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
110  const std::string& label = std::string(),
111  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
112 
113 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
114  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
115  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
116  const LocalOrdinalViewType & Acol2Prow,
117  const LocalOrdinalViewType & Acol2PIrow,
118  const LocalOrdinalViewType & Pcol2Ccol,
119  const LocalOrdinalViewType & PIcol2Ccol,
120  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
121  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
122  const std::string& label = std::string(),
123  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
124 
125  static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
126  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
127  const LocalOrdinalViewType & Acol2Prow,
128  const LocalOrdinalViewType & Acol2PIrow,
129  const LocalOrdinalViewType & Pcol2Ccol,
130  const LocalOrdinalViewType & PIcol2Ccol,
131  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
132  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
133  const std::string& label = std::string(),
134  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
135 
136  static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
137  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
138  const LocalOrdinalViewType & Acol2Prow,
139  const LocalOrdinalViewType & Acol2PIrow,
140  const LocalOrdinalViewType & Pcol2Ccol,
141  const LocalOrdinalViewType & PIcol2Ccol,
142  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
143  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
144  const std::string& label = std::string(),
145  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
146 };
147 
148 
149 /*********************************************************************************************************/
150 template<class Scalar,
151  class LocalOrdinal,
152  class GlobalOrdinal,
153  class LocalOrdinalViewType>
154 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
155  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
156  const LocalOrdinalViewType & Acol2Brow,
157  const LocalOrdinalViewType & Acol2Irow,
158  const LocalOrdinalViewType & Bcol2Ccol,
159  const LocalOrdinalViewType & Icol2Ccol,
160  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
161  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
162  const std::string& label,
163  const Teuchos::RCP<Teuchos::ParameterList>& params) {
164 
165 #ifdef HAVE_TPETRA_MMM_TIMINGS
166  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
167  using Teuchos::TimeMonitor;
168  Teuchos::RCP<TimeMonitor> MM;
169 #endif
170 
171  // Node-specific code
172  std::string nodename("OpenMP");
173 
174  // Lots and lots of typedefs
175  using Teuchos::RCP;
177  typedef typename KCRS::device_type device_t;
178  typedef typename KCRS::StaticCrsGraphType graph_t;
179  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
180  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
181  typedef typename KCRS::values_type::non_const_type scalar_view_t;
182 
183  // Options
184  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
185  std::string myalg("SPGEMM_KK_MEMORY");
186 
187 
188  if(!params.is_null()) {
189  if(params->isParameter("openmp: algorithm"))
190  myalg = params->get("openmp: algorithm",myalg);
191  if(params->isParameter("openmp: team work size"))
192  team_work_size = params->get("openmp: team work size",team_work_size);
193  }
194 
195  if(myalg == "LTG") {
196  // Use the LTG kernel if requested
197  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
198  }
199  else {
200  // Use the Kokkos-Kernels OpenMP Kernel
201 #ifdef HAVE_TPETRA_MMM_TIMINGS
202  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
203 #endif
204  // KokkosKernelsHandle
205  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
206  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
207  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
208 
209  // Grab the Kokkos::SparseCrsMatrices
210  const KCRS & Ak = Aview.origMatrix->getLocalMatrixDevice();
211  // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
212 
213  // Get the algorithm mode
214  std::string alg = nodename+std::string(" algorithm");
215  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
216  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
217  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
218 
219  // Merge the B and Bimport matrices
220  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
221 
222 #ifdef HAVE_TPETRA_MMM_TIMINGS
223  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
224 #endif
225 
226  // Do the multiply on whatever we've got
227  typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
228  // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
229  // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
230  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
231  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
232 
233 
234  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
235  lno_nnz_view_t entriesC;
236  scalar_view_t valuesC;
237  KernelHandle kh;
238  kh.create_spgemm_handle(alg_enum);
239  kh.set_team_work_size(team_work_size);
240  // KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
241  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
242 
243  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
244  if (c_nnz_size){
245  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
246  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
247  }
248  // KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
249  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
250  kh.destroy_spgemm_handle();
251 
252 #ifdef HAVE_TPETRA_MMM_TIMINGS
253  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
254 #endif
255  // Sort & set values
256  if (params.is_null() || params->get("sort entries",true))
257  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
258  C.setAllValues(row_mapC,entriesC,valuesC);
259 
260  }// end OMP KokkosKernels loop
261 
262 #ifdef HAVE_TPETRA_MMM_TIMINGS
263  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
264 #endif
265 
266  // Final Fillcomplete
267  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
268  labelList->set("Timer Label",label);
269  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
270  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
271  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
272 
273 #if 0
274  {
275  Teuchos::ArrayRCP< const size_t > Crowptr;
276  Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
277  Teuchos::ArrayRCP< const Scalar > Cvalues;
278  C.getAllValues(Crowptr,Ccolind,Cvalues);
279 
280  // DEBUG
281  int MyPID = C->getComm()->getRank();
282  printf("[%d] Crowptr = ",MyPID);
283  for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
284  printf("%3d ",(int)Crowptr.getConst()[i]);
285  }
286  printf("\n");
287  printf("[%d] Ccolind = ",MyPID);
288  for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
289  printf("%3d ",(int)Ccolind.getConst()[i]);
290  }
291  printf("\n");
292  fflush(stdout);
293  // END DEBUG
294  }
295 #endif
296 }
297 
298 
299 /*********************************************************************************************************/
300 template<class Scalar,
301  class LocalOrdinal,
302  class GlobalOrdinal,
303  class LocalOrdinalViewType>
304 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
305  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
306  const LocalOrdinalViewType & Acol2Brow,
307  const LocalOrdinalViewType & Acol2Irow,
308  const LocalOrdinalViewType & Bcol2Ccol,
309  const LocalOrdinalViewType & Icol2Ccol,
310  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
311  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
312  const std::string& label,
313  const Teuchos::RCP<Teuchos::ParameterList>& params) {
314 #ifdef HAVE_TPETRA_MMM_TIMINGS
315  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
316  using Teuchos::TimeMonitor;
317  Teuchos::RCP<TimeMonitor> MM;
318 #endif
319 
320  // Lots and lots of typedefs
321  using Teuchos::RCP;
322 
323  // Options
324  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
325  std::string myalg("LTG");
326  if(!params.is_null()) {
327  if(params->isParameter("openmp: algorithm"))
328  myalg = params->get("openmp: algorithm",myalg);
329  if(params->isParameter("openmp: team work size"))
330  team_work_size = params->get("openmp: team work size",team_work_size);
331  }
332 
333  if(myalg == "LTG") {
334  // Use the LTG kernel if requested
335  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
336  }
337  else {
338  throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
339  }
340 
341 #ifdef HAVE_TPETRA_MMM_TIMINGS
342  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
343 #endif
344  C.fillComplete(C.getDomainMap(), C.getRangeMap());
345 }
346 
347 
348 /*********************************************************************************************************/
349 template<class Scalar,
350  class LocalOrdinal,
351  class GlobalOrdinal,
352  class LocalOrdinalViewType>
353 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
354  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
355  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
356  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
357  const LocalOrdinalViewType & Acol2Brow,
358  const LocalOrdinalViewType & Acol2Irow,
359  const LocalOrdinalViewType & Bcol2Ccol,
360  const LocalOrdinalViewType & Icol2Ccol,
361  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
362  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
363  const std::string& label,
364  const Teuchos::RCP<Teuchos::ParameterList>& params) {
365 
366 #ifdef HAVE_TPETRA_MMM_TIMINGS
367  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
368  using Teuchos::TimeMonitor;
369  Teuchos::RCP<TimeMonitor> MM;
370 #endif
371 
372  // Node-specific code
373  using Teuchos::RCP;
374 
375  // Options
376  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
377  std::string myalg("LTG");
378  if(!params.is_null()) {
379  if(params->isParameter("openmp: jacobi algorithm"))
380  myalg = params->get("openmp: jacobi algorithm",myalg);
381  if(params->isParameter("openmp: team work size"))
382  team_work_size = params->get("openmp: team work size",team_work_size);
383  }
384 
385  if(myalg == "LTG") {
386  // Use the LTG kernel if requested
387  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
388  }
389  else if(myalg == "MSAK") {
390  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
391  }
392  else if(myalg == "KK") {
393  jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
394  }
395  else {
396  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
397  }
398 
399 #ifdef HAVE_TPETRA_MMM_TIMINGS
400  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
401 #endif
402 
403  // Final Fillcomplete
404  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
405  labelList->set("Timer Label",label);
406  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
407 
408  // NOTE: MSAK already fillCompletes, so we have to check here
409  if(!C.isFillComplete()) {
410  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
411  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
412  }
413 
414 }
415 
416 
417 
418 /*********************************************************************************************************/
419 template<class Scalar,
420  class LocalOrdinal,
421  class GlobalOrdinal,
422  class LocalOrdinalViewType>
423 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
424  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
425  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
426  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
427  const LocalOrdinalViewType & Acol2Brow,
428  const LocalOrdinalViewType & Acol2Irow,
429  const LocalOrdinalViewType & Bcol2Ccol,
430  const LocalOrdinalViewType & Icol2Ccol,
431  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
432  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
433  const std::string& label,
434  const Teuchos::RCP<Teuchos::ParameterList>& params) {
435 
436 #ifdef HAVE_TPETRA_MMM_TIMINGS
437  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
438  using Teuchos::TimeMonitor;
439  Teuchos::RCP<TimeMonitor> MM;
440 #endif
441 
442  // Lots and lots of typedefs
443  using Teuchos::RCP;
444 
445  // Options
446  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
447  std::string myalg("LTG");
448  if(!params.is_null()) {
449  if(params->isParameter("openmp: jacobi algorithm"))
450  myalg = params->get("openmp: jacobi algorithm",myalg);
451  if(params->isParameter("openmp: team work size"))
452  team_work_size = params->get("openmp: team work size",team_work_size);
453  }
454 
455  if(myalg == "LTG") {
456  // Use the LTG kernel if requested
457  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
458  }
459  else {
460  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
461  }
462 
463 #ifdef HAVE_TPETRA_MMM_TIMINGS
464  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
465 #endif
466  C.fillComplete(C.getDomainMap(), C.getRangeMap());
467 
468 }
469 
470 /*********************************************************************************************************/
471 template<class Scalar,
472  class LocalOrdinal,
473  class GlobalOrdinal,
474  class LocalOrdinalViewType>
475 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
476  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
477  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
478  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
479  const LocalOrdinalViewType & Acol2Brow,
480  const LocalOrdinalViewType & Acol2Irow,
481  const LocalOrdinalViewType & Bcol2Ccol,
482  const LocalOrdinalViewType & Icol2Ccol,
483  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
484  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
485  const std::string& label,
486  const Teuchos::RCP<Teuchos::ParameterList>& params) {
487 
488 #ifdef HAVE_TPETRA_MMM_TIMINGS
489  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
490  using Teuchos::TimeMonitor;
491  Teuchos::RCP<TimeMonitor> MM;
492 #endif
493 
494  // Check if the diagonal entries exist in debug mode
495  const bool debug = Tpetra::Details::Behavior::debug();
496  if(debug) {
497 
498  auto rowMap = Aview.origMatrix->getRowMap();
500  Aview.origMatrix->getLocalDiagCopy(diags);
501  size_t diagLength = rowMap->getLocalNumElements();
502  Teuchos::Array<Scalar> diagonal(diagLength);
503  diags.get1dCopy(diagonal());
504 
505  for(size_t i = 0; i < diagLength; ++i) {
506  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
507  std::runtime_error,
508  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
509  "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
510  }
511  }
512 
513  // Usings
514  using device_t = typename Tpetra::KokkosCompat::KokkosOpenMPWrapperNode::device_type;
516  using graph_t = typename matrix_t::StaticCrsGraphType;
517  using lno_view_t = typename graph_t::row_map_type::non_const_type;
518  using c_lno_view_t = typename graph_t::row_map_type::const_type;
519  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
520  using scalar_view_t = typename matrix_t::values_type::non_const_type;
521 
522  // KokkosKernels handle
523  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
524  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
525  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
526 
527  // Get the rowPtr, colInd and vals of importMatrix
528  c_lno_view_t Irowptr;
529  lno_nnz_view_t Icolind;
530  scalar_view_t Ivals;
531  if(!Bview.importMatrix.is_null()) {
532  auto lclB = Bview.importMatrix->getLocalMatrixDevice();
533  Irowptr = lclB.graph.row_map;
534  Icolind = lclB.graph.entries;
535  Ivals = lclB.values;
536  }
537 
538  // Merge the B and Bimport matrices
539  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
540 
541  // Get the properties and arrays of input matrices
542  const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
543  const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
544 
545  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
546  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
547  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
548 
549  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
550  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
551  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
552 
553  // Arrays of the output matrix
554  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
555  lno_nnz_view_t entriesC;
556  scalar_view_t valuesC;
557 
558  // Options
559  int team_work_size = 16;
560  std::string myalg("SPGEMM_KK_MEMORY");
561  if(!params.is_null()) {
562  if(params->isParameter("cuda: algorithm"))
563  myalg = params->get("cuda: algorithm",myalg);
564  if(params->isParameter("cuda: team work size"))
565  team_work_size = params->get("cuda: team work size",team_work_size);
566  }
567 
568  // Get the algorithm mode
569  std::string nodename("OpenMP");
570  std::string alg = nodename + std::string(" algorithm");
571  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
572  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
573 
574 
575  // KokkosKernels call
576  handle_t kh;
577  kh.create_spgemm_handle(alg_enum);
578  kh.set_team_work_size(team_work_size);
579 
580  KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
581  Arowptr, Acolind, false,
582  Browptr, Bcolind, false,
583  row_mapC);
584 
585  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
586  if (c_nnz_size){
587  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
588  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
589  }
590 
591  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
592  Arowptr, Acolind, Avals, false,
593  Browptr, Bcolind, Bvals, false,
594  row_mapC, entriesC, valuesC,
595  omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
596  kh.destroy_spgemm_handle();
597 
598 #ifdef HAVE_TPETRA_MMM_TIMINGS
599  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
600 #endif
601 
602  // Sort & set values
603  if (params.is_null() || params->get("sort entries",true))
604  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
605  C.setAllValues(row_mapC,entriesC,valuesC);
606 
607 #ifdef HAVE_TPETRA_MMM_TIMINGS
608  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
609 #endif
610 
611  // Final Fillcomplete
612  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
613  labelList->set("Timer Label",label);
614  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
615  Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
616  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
617 }
618 
619 
620 /*********************************************************************************************************/
621 template<class Scalar,
622  class LocalOrdinal,
623  class GlobalOrdinal,
624  class LocalOrdinalViewType>
625 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
626  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
627  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
628  const LocalOrdinalViewType & Acol2Prow,
629  const LocalOrdinalViewType & Acol2PIrow,
630  const LocalOrdinalViewType & Pcol2Accol,
631  const LocalOrdinalViewType & PIcol2Accol,
632  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
633  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
634  const std::string& label,
635  const Teuchos::RCP<Teuchos::ParameterList>& params) {
636 
637 
638 
639 #ifdef HAVE_TPETRA_MMM_TIMINGS
640  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
641  using Teuchos::TimeMonitor;
642  Teuchos::RCP<TimeMonitor> MM;
643 #endif
644 
645  // Node-specific code
646  std::string nodename("OpenMP");
647 
648  // Options
649  std::string myalg("LTG");
650 
651  if(!params.is_null()) {
652  if(params->isParameter("openmp: rap algorithm"))
653  myalg = params->get("openmp: rap algorithm",myalg);
654  }
655 
656  if(myalg == "LTG") {
657  // Use the LTG kernel if requested
658  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
659  }
660  else {
661  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
662  }
663 }
664 
665 /*********************************************************************************************************/
666 template<class Scalar,
667  class LocalOrdinal,
668  class GlobalOrdinal,
669  class LocalOrdinalViewType>
670 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
671  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
672  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
673  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
674 
675  const LocalOrdinalViewType & Acol2Prow,
676  const LocalOrdinalViewType & Acol2Irow,
677  const LocalOrdinalViewType & Pcol2Ccol,
678  const LocalOrdinalViewType & Icol2Ccol,
679  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
680  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
681  const std::string& label,
682  const Teuchos::RCP<Teuchos::ParameterList>& params) {
683 
684 #ifdef HAVE_TPETRA_MMM_TIMINGS
685  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
686  using Teuchos::TimeMonitor;
687  Teuchos::RCP<TimeMonitor> MM;
688 #endif
689 
690  // Lots and lots of typedefs
691  using Teuchos::RCP;
692 
693  // Options
694  std::string myalg("LTG");
695  if(!params.is_null()) {
696  if(params->isParameter("openmp: rap algorithm"))
697  myalg = params->get("openmp: rap algorithm",myalg);
698  }
699 
700  if(myalg == "LTG") {
701  // Use the LTG kernel if requested
702  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
703  }
704  else {
705  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
706  }
707 
708 #ifdef HAVE_TPETRA_MMM_TIMINGS
709  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
710 #endif
711  C.fillComplete(C.getDomainMap(), C.getRangeMap());
712 
713 }
714 
715 
716 
717 /*********************************************************************************************************/
718 template<class Scalar,
719  class LocalOrdinal,
720  class GlobalOrdinal,
721  class LocalOrdinalViewType>
722 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
723 
724  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
725  const LocalOrdinalViewType & Acol2Prow,
726  const LocalOrdinalViewType & Acol2PIrow,
727  const LocalOrdinalViewType & Pcol2Accol,
728  const LocalOrdinalViewType & PIcol2Accol,
729  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
730  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
731  const std::string& label,
732  const Teuchos::RCP<Teuchos::ParameterList>& params) {
733 
734 
735 #ifdef HAVE_TPETRA_MMM_TIMINGS
736  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
737  using Teuchos::TimeMonitor;
738  Teuchos::RCP<TimeMonitor> MM;
739 #endif
740 
741  // Node-specific code
742  std::string nodename("OpenMP");
743 
744  // Options
745  std::string myalg("LTG");
746 
747  if(!params.is_null()) {
748  if(params->isParameter("openmp: ptap algorithm"))
749  myalg = params->get("openmp: ptap algorithm",myalg);
750  }
751 
752  if(myalg == "LTG") {
753 #ifdef HAVE_TPETRA_MMM_TIMINGS
754  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
755 #endif
756 
757  using Teuchos::ParameterList;
758  using Teuchos::RCP;
759  using LO = LocalOrdinal;
760  using GO = GlobalOrdinal;
761  using SC = Scalar;
762 
763  // We don't need a kernel-level PTAP, we just transpose here
764  using transposer_type =
765  RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
766  transposer_type transposer (Pview.origMatrix, label + "XP: ");
767  RCP<ParameterList> transposeParams (new ParameterList);
768  if (! params.is_null ()) {
769  transposeParams->set ("compute global constants",
770  params->get ("compute global constants: temporaries",
771  false));
772  }
773  transposeParams->set ("sort", false);
774  RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
775  transposer.createTransposeLocal (transposeParams);
776  CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
777  Rview.origMatrix = Ptrans;
778 
779  using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
780  mult_R_A_P_newmatrix_LowThreadGustavsonKernel
781  (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
782  PIcol2Accol, Ac, Acimport, label, params);
783  }
784  else {
785  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
786  }
787 }
788 
789 /*********************************************************************************************************/
790 template<class Scalar,
791  class LocalOrdinal,
792  class GlobalOrdinal,
793  class LocalOrdinalViewType>
794 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
795 
796  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
797  const LocalOrdinalViewType & Acol2Prow,
798  const LocalOrdinalViewType & Acol2PIrow,
799  const LocalOrdinalViewType & Pcol2Accol,
800  const LocalOrdinalViewType & PIcol2Accol,
801  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
802  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
803  const std::string& label,
804  const Teuchos::RCP<Teuchos::ParameterList>& params) {
805 
806 
807 #ifdef HAVE_TPETRA_MMM_TIMINGS
808  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
809  using Teuchos::TimeMonitor;
810  Teuchos::RCP<TimeMonitor> MM;
811 #endif
812 
813  // Node-specific code
814  std::string nodename("OpenMP");
815 
816  // Options
817  std::string myalg("LTG");
818 
819  if(!params.is_null()) {
820  if(params->isParameter("openmp: ptap algorithm"))
821  myalg = params->get("openmp: ptap algorithm",myalg);
822  }
823 
824  if(myalg == "LTG") {
825 #ifdef HAVE_TPETRA_MMM_TIMINGS
826  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
827 #endif
828 
829  using Teuchos::ParameterList;
830  using Teuchos::RCP;
831  using LO = LocalOrdinal;
832  using GO = GlobalOrdinal;
833  using SC = Scalar;
834 
835  // We don't need a kernel-level PTAP, we just transpose here
836  using transposer_type =
837  RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
838  transposer_type transposer (Pview.origMatrix, label + "XP: ");
839  RCP<ParameterList> transposeParams (new ParameterList);
840  if (! params.is_null ()) {
841  transposeParams->set ("compute global constants",
842  params->get ("compute global constants: temporaries",
843  false));
844  }
845  transposeParams->set ("sort", false);
846  RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
847  transposer.createTransposeLocal (transposeParams);
848  CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
849  Rview.origMatrix = Ptrans;
850 
851  using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
852  mult_R_A_P_reuse_LowThreadGustavsonKernel
853  (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
854  PIcol2Accol, Ac, Acimport, label, params);
855  }
856  else {
857  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
858  }
859  Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
860 }
861 
862 
863 }//MMdetails
864 }//Tpetra
865 
866 #endif//OpenMP
867 
868 #endif
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.