Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_OpenMP.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
44 
45 #ifdef HAVE_TPETRA_INST_OPENMP
46 namespace Tpetra {
47 namespace MMdetails {
48 
49 /*********************************************************************************************************/
50 // MMM KernelWrappers for Partial Specialization to OpenMP
51 template<class Scalar,
52  class LocalOrdinal,
53  class GlobalOrdinal, class LocalOrdinalViewType>
54 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
55  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
56  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
57  const LocalOrdinalViewType & Acol2Brow,
58  const LocalOrdinalViewType & Acol2Irow,
59  const LocalOrdinalViewType & Bcol2Ccol,
60  const LocalOrdinalViewType & Icol2Ccol,
61  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
62  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
63  const std::string& label = std::string(),
64  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
65 
66  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
67  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
68  const LocalOrdinalViewType & Acol2Brow,
69  const LocalOrdinalViewType & Acol2Irow,
70  const LocalOrdinalViewType & Bcol2Ccol,
71  const LocalOrdinalViewType & Icol2Ccol,
72  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
73  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
74  const std::string& label = std::string(),
75  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76 
77 
78 };
79 
80 
81 // Jacobi KernelWrappers for Partial Specialization to OpenMP
82 template<class Scalar,
83  class LocalOrdinal,
84  class GlobalOrdinal, class LocalOrdinalViewType>
85 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
86  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
87  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
88  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
89  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
90  const LocalOrdinalViewType & Acol2Brow,
91  const LocalOrdinalViewType & Acol2Irow,
92  const LocalOrdinalViewType & Bcol2Ccol,
93  const LocalOrdinalViewType & Icol2Ccol,
94  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
95  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
96  const std::string& label = std::string(),
97  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
98 
99  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
100  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
101  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
102  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
103  const LocalOrdinalViewType & Acol2Brow,
104  const LocalOrdinalViewType & Acol2Irow,
105  const LocalOrdinalViewType & Bcol2Ccol,
106  const LocalOrdinalViewType & Icol2Ccol,
107  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
108  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
109  const std::string& label = std::string(),
110  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
111 
112  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
113  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
114  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
115  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
116  const LocalOrdinalViewType & Acol2Brow,
117  const LocalOrdinalViewType & Acol2Irow,
118  const LocalOrdinalViewType & Bcol2Ccol,
119  const LocalOrdinalViewType & Icol2Ccol,
120  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
121  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
122  const std::string& label = std::string(),
123  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
124 
125 };
126 
127 
128 // Triple-Product KernelWrappers for Partial Specialization to OpenMP
129 template<class Scalar,
130  class LocalOrdinal,
131  class GlobalOrdinal, class LocalOrdinalViewType>
132 struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
133  static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
134  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
135  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
136  const LocalOrdinalViewType & Acol2Prow,
137  const LocalOrdinalViewType & Acol2PIrow,
138  const LocalOrdinalViewType & Pcol2Ccol,
139  const LocalOrdinalViewType & PIcol2Ccol,
140  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
141  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
142  const std::string& label = std::string(),
143  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
144 
145 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
146  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
147  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
148  const LocalOrdinalViewType & Acol2Prow,
149  const LocalOrdinalViewType & Acol2PIrow,
150  const LocalOrdinalViewType & Pcol2Ccol,
151  const LocalOrdinalViewType & PIcol2Ccol,
152  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
153  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
154  const std::string& label = std::string(),
155  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
156 
157  static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
158  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
159  const LocalOrdinalViewType & Acol2Prow,
160  const LocalOrdinalViewType & Acol2PIrow,
161  const LocalOrdinalViewType & Pcol2Ccol,
162  const LocalOrdinalViewType & PIcol2Ccol,
163  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
164  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
165  const std::string& label = std::string(),
166  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
167 
168  static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
169  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
170  const LocalOrdinalViewType & Acol2Prow,
171  const LocalOrdinalViewType & Acol2PIrow,
172  const LocalOrdinalViewType & Pcol2Ccol,
173  const LocalOrdinalViewType & PIcol2Ccol,
174  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
175  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
176  const std::string& label = std::string(),
177  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
178 };
179 
180 
181 /*********************************************************************************************************/
182 template<class Scalar,
183  class LocalOrdinal,
184  class GlobalOrdinal,
185  class LocalOrdinalViewType>
186 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
187  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
188  const LocalOrdinalViewType & Acol2Brow,
189  const LocalOrdinalViewType & Acol2Irow,
190  const LocalOrdinalViewType & Bcol2Ccol,
191  const LocalOrdinalViewType & Icol2Ccol,
192  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
193  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
194  const std::string& label,
195  const Teuchos::RCP<Teuchos::ParameterList>& params) {
196 
197 #ifdef HAVE_TPETRA_MMM_TIMINGS
198  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
199  using Teuchos::TimeMonitor;
200  Teuchos::RCP<TimeMonitor> MM;
201 #endif
202 
203  // Node-specific code
204  std::string nodename("OpenMP");
205 
206  // Lots and lots of typedefs
207  using Teuchos::RCP;
209  typedef typename KCRS::device_type device_t;
210  typedef typename KCRS::StaticCrsGraphType graph_t;
211  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
212  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
213  typedef typename KCRS::values_type::non_const_type scalar_view_t;
214 
215  // Options
216  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
217  std::string myalg("SPGEMM_KK_MEMORY");
218 
219 
220  if(!params.is_null()) {
221  if(params->isParameter("openmp: algorithm"))
222  myalg = params->get("openmp: algorithm",myalg);
223  if(params->isParameter("openmp: team work size"))
224  team_work_size = params->get("openmp: team work size",team_work_size);
225  }
226 
227  if(myalg == "LTG") {
228  // Use the LTG kernel if requested
229  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
230  }
231  else {
232  // Use the Kokkos-Kernels OpenMP Kernel
233 #ifdef HAVE_TPETRA_MMM_TIMINGS
234  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
235 #endif
236  // KokkosKernelsHandle
237  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
238  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
239  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
240 
241  // Grab the Kokkos::SparseCrsMatrices
242  const KCRS & Ak = Aview.origMatrix->getLocalMatrixDevice();
243  // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
244 
245  // Get the algorithm mode
246  std::string alg = nodename+std::string(" algorithm");
247  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
248  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
249  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
250 
251  // Merge the B and Bimport matrices
252  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
253 
254 #ifdef HAVE_TPETRA_MMM_TIMINGS
255  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
256 #endif
257 
258  // Do the multiply on whatever we've got
259  typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
260  // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
261  // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
262  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
263  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
264 
265 
266  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
267  lno_nnz_view_t entriesC;
268  scalar_view_t valuesC;
269  KernelHandle kh;
270  kh.create_spgemm_handle(alg_enum);
271  kh.set_team_work_size(team_work_size);
272  // KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
273  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
274 
275  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
276  if (c_nnz_size){
277  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
278  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
279  }
280  // KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
281  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
282  kh.destroy_spgemm_handle();
283 
284 #ifdef HAVE_TPETRA_MMM_TIMINGS
285  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
286 #endif
287  // Sort & set values
288  if (params.is_null() || params->get("sort entries",true))
289  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
290  C.setAllValues(row_mapC,entriesC,valuesC);
291 
292  }// end OMP KokkosKernels loop
293 
294 #ifdef HAVE_TPETRA_MMM_TIMINGS
295  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
296 #endif
297 
298  // Final Fillcomplete
299  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
300  labelList->set("Timer Label",label);
301  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
302  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
303  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
304 
305 #if 0
306  {
307  Teuchos::ArrayRCP< const size_t > Crowptr;
308  Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
309  Teuchos::ArrayRCP< const Scalar > Cvalues;
310  C.getAllValues(Crowptr,Ccolind,Cvalues);
311 
312  // DEBUG
313  int MyPID = C->getComm()->getRank();
314  printf("[%d] Crowptr = ",MyPID);
315  for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
316  printf("%3d ",(int)Crowptr.getConst()[i]);
317  }
318  printf("\n");
319  printf("[%d] Ccolind = ",MyPID);
320  for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
321  printf("%3d ",(int)Ccolind.getConst()[i]);
322  }
323  printf("\n");
324  fflush(stdout);
325  // END DEBUG
326  }
327 #endif
328 }
329 
330 
331 /*********************************************************************************************************/
332 template<class Scalar,
333  class LocalOrdinal,
334  class GlobalOrdinal,
335  class LocalOrdinalViewType>
336 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
337  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
338  const LocalOrdinalViewType & Acol2Brow,
339  const LocalOrdinalViewType & Acol2Irow,
340  const LocalOrdinalViewType & Bcol2Ccol,
341  const LocalOrdinalViewType & Icol2Ccol,
342  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
343  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
344  const std::string& label,
345  const Teuchos::RCP<Teuchos::ParameterList>& params) {
346 #ifdef HAVE_TPETRA_MMM_TIMINGS
347  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
348  using Teuchos::TimeMonitor;
349  Teuchos::RCP<TimeMonitor> MM;
350 #endif
351 
352  // Lots and lots of typedefs
353  using Teuchos::RCP;
354 
355  // Options
356  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
357  std::string myalg("LTG");
358  if(!params.is_null()) {
359  if(params->isParameter("openmp: algorithm"))
360  myalg = params->get("openmp: algorithm",myalg);
361  if(params->isParameter("openmp: team work size"))
362  team_work_size = params->get("openmp: team work size",team_work_size);
363  }
364 
365  if(myalg == "LTG") {
366  // Use the LTG kernel if requested
367  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
368  }
369  else {
370  throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
371  }
372 
373 #ifdef HAVE_TPETRA_MMM_TIMINGS
374  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
375 #endif
376  C.fillComplete(C.getDomainMap(), C.getRangeMap());
377 }
378 
379 
380 /*********************************************************************************************************/
381 template<class Scalar,
382  class LocalOrdinal,
383  class GlobalOrdinal,
384  class LocalOrdinalViewType>
385 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
386  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
387  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
388  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
389  const LocalOrdinalViewType & Acol2Brow,
390  const LocalOrdinalViewType & Acol2Irow,
391  const LocalOrdinalViewType & Bcol2Ccol,
392  const LocalOrdinalViewType & Icol2Ccol,
393  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
394  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
395  const std::string& label,
396  const Teuchos::RCP<Teuchos::ParameterList>& params) {
397 
398 #ifdef HAVE_TPETRA_MMM_TIMINGS
399  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
400  using Teuchos::TimeMonitor;
401  Teuchos::RCP<TimeMonitor> MM;
402 #endif
403 
404  // Node-specific code
405  using Teuchos::RCP;
406 
407  // Options
408  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
409  std::string myalg("LTG");
410  if(!params.is_null()) {
411  if(params->isParameter("openmp: jacobi algorithm"))
412  myalg = params->get("openmp: jacobi algorithm",myalg);
413  if(params->isParameter("openmp: team work size"))
414  team_work_size = params->get("openmp: team work size",team_work_size);
415  }
416 
417  if(myalg == "LTG") {
418  // Use the LTG kernel if requested
419  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
420  }
421  else if(myalg == "MSAK") {
422  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
423  }
424  else if(myalg == "KK") {
425  jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
426  }
427  else {
428  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
429  }
430 
431 #ifdef HAVE_TPETRA_MMM_TIMINGS
432  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
433 #endif
434 
435  // Final Fillcomplete
436  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
437  labelList->set("Timer Label",label);
438  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
439 
440  // NOTE: MSAK already fillCompletes, so we have to check here
441  if(!C.isFillComplete()) {
442  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
443  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
444  }
445 
446 }
447 
448 
449 
450 /*********************************************************************************************************/
451 template<class Scalar,
452  class LocalOrdinal,
453  class GlobalOrdinal,
454  class LocalOrdinalViewType>
455 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
456  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
457  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
458  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
459  const LocalOrdinalViewType & Acol2Brow,
460  const LocalOrdinalViewType & Acol2Irow,
461  const LocalOrdinalViewType & Bcol2Ccol,
462  const LocalOrdinalViewType & Icol2Ccol,
463  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
464  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
465  const std::string& label,
466  const Teuchos::RCP<Teuchos::ParameterList>& params) {
467 
468 #ifdef HAVE_TPETRA_MMM_TIMINGS
469  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
470  using Teuchos::TimeMonitor;
471  Teuchos::RCP<TimeMonitor> MM;
472 #endif
473 
474  // Lots and lots of typedefs
475  using Teuchos::RCP;
476 
477  // Options
478  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
479  std::string myalg("LTG");
480  if(!params.is_null()) {
481  if(params->isParameter("openmp: jacobi algorithm"))
482  myalg = params->get("openmp: jacobi algorithm",myalg);
483  if(params->isParameter("openmp: team work size"))
484  team_work_size = params->get("openmp: team work size",team_work_size);
485  }
486 
487  if(myalg == "LTG") {
488  // Use the LTG kernel if requested
489  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
490  }
491  else {
492  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
493  }
494 
495 #ifdef HAVE_TPETRA_MMM_TIMINGS
496  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
497 #endif
498  C.fillComplete(C.getDomainMap(), C.getRangeMap());
499 
500 }
501 
502 /*********************************************************************************************************/
503 template<class Scalar,
504  class LocalOrdinal,
505  class GlobalOrdinal,
506  class LocalOrdinalViewType>
507 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
508  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
509  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
510  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
511  const LocalOrdinalViewType & Acol2Brow,
512  const LocalOrdinalViewType & Acol2Irow,
513  const LocalOrdinalViewType & Bcol2Ccol,
514  const LocalOrdinalViewType & Icol2Ccol,
515  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
516  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
517  const std::string& label,
518  const Teuchos::RCP<Teuchos::ParameterList>& params) {
519 
520 #ifdef HAVE_TPETRA_MMM_TIMINGS
521  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
522  using Teuchos::TimeMonitor;
523  Teuchos::RCP<TimeMonitor> MM;
524 #endif
525 
526  // Check if the diagonal entries exist in debug mode
527  const bool debug = Tpetra::Details::Behavior::debug();
528  if(debug) {
529 
530  auto rowMap = Aview.origMatrix->getRowMap();
532  Aview.origMatrix->getLocalDiagCopy(diags);
533  size_t diagLength = rowMap->getLocalNumElements();
534  Teuchos::Array<Scalar> diagonal(diagLength);
535  diags.get1dCopy(diagonal());
536 
537  for(size_t i = 0; i < diagLength; ++i) {
538  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
539  std::runtime_error,
540  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
541  "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
542  }
543  }
544 
545  // Usings
546  using device_t = typename Tpetra::KokkosCompat::KokkosOpenMPWrapperNode::device_type;
548  using graph_t = typename matrix_t::StaticCrsGraphType;
549  using lno_view_t = typename graph_t::row_map_type::non_const_type;
550  using c_lno_view_t = typename graph_t::row_map_type::const_type;
551  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
552  using scalar_view_t = typename matrix_t::values_type::non_const_type;
553 
554  // KokkosKernels handle
555  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
556  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
557  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
558 
559  // Get the rowPtr, colInd and vals of importMatrix
560  c_lno_view_t Irowptr;
561  lno_nnz_view_t Icolind;
562  scalar_view_t Ivals;
563  if(!Bview.importMatrix.is_null()) {
564  auto lclB = Bview.importMatrix->getLocalMatrixDevice();
565  Irowptr = lclB.graph.row_map;
566  Icolind = lclB.graph.entries;
567  Ivals = lclB.values;
568  }
569 
570  // Merge the B and Bimport matrices
571  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
572 
573  // Get the properties and arrays of input matrices
574  const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
575  const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
576 
577  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
578  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
579  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
580 
581  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
582  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
583  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
584 
585  // Arrays of the output matrix
586  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
587  lno_nnz_view_t entriesC;
588  scalar_view_t valuesC;
589 
590  // Options
591  int team_work_size = 16;
592  std::string myalg("SPGEMM_KK_MEMORY");
593  if(!params.is_null()) {
594  if(params->isParameter("cuda: algorithm"))
595  myalg = params->get("cuda: algorithm",myalg);
596  if(params->isParameter("cuda: team work size"))
597  team_work_size = params->get("cuda: team work size",team_work_size);
598  }
599 
600  // Get the algorithm mode
601  std::string nodename("OpenMP");
602  std::string alg = nodename + std::string(" algorithm");
603  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
604  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
605 
606 
607  // KokkosKernels call
608  handle_t kh;
609  kh.create_spgemm_handle(alg_enum);
610  kh.set_team_work_size(team_work_size);
611 
612  KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
613  Arowptr, Acolind, false,
614  Browptr, Bcolind, false,
615  row_mapC);
616 
617  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
618  if (c_nnz_size){
619  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
620  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
621  }
622 
623  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
624  Arowptr, Acolind, Avals, false,
625  Browptr, Bcolind, Bvals, false,
626  row_mapC, entriesC, valuesC,
627  omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
628  kh.destroy_spgemm_handle();
629 
630 #ifdef HAVE_TPETRA_MMM_TIMINGS
631  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
632 #endif
633 
634  // Sort & set values
635  if (params.is_null() || params->get("sort entries",true))
636  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
637  C.setAllValues(row_mapC,entriesC,valuesC);
638 
639 #ifdef HAVE_TPETRA_MMM_TIMINGS
640  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
641 #endif
642 
643  // Final Fillcomplete
644  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
645  labelList->set("Timer Label",label);
646  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
647  Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
648  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
649 }
650 
651 
652 /*********************************************************************************************************/
653 template<class Scalar,
654  class LocalOrdinal,
655  class GlobalOrdinal,
656  class LocalOrdinalViewType>
657 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
658  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
659  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
660  const LocalOrdinalViewType & Acol2Prow,
661  const LocalOrdinalViewType & Acol2PIrow,
662  const LocalOrdinalViewType & Pcol2Accol,
663  const LocalOrdinalViewType & PIcol2Accol,
664  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
665  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
666  const std::string& label,
667  const Teuchos::RCP<Teuchos::ParameterList>& params) {
668 
669 
670 
671 #ifdef HAVE_TPETRA_MMM_TIMINGS
672  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
673  using Teuchos::TimeMonitor;
674  Teuchos::RCP<TimeMonitor> MM;
675 #endif
676 
677  // Node-specific code
678  std::string nodename("OpenMP");
679 
680  // Options
681  std::string myalg("LTG");
682 
683  if(!params.is_null()) {
684  if(params->isParameter("openmp: rap algorithm"))
685  myalg = params->get("openmp: rap algorithm",myalg);
686  }
687 
688  if(myalg == "LTG") {
689  // Use the LTG kernel if requested
690  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
691  }
692  else {
693  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
694  }
695 }
696 
697 /*********************************************************************************************************/
698 template<class Scalar,
699  class LocalOrdinal,
700  class GlobalOrdinal,
701  class LocalOrdinalViewType>
702 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
703  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
704  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
705  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
706 
707  const LocalOrdinalViewType & Acol2Prow,
708  const LocalOrdinalViewType & Acol2Irow,
709  const LocalOrdinalViewType & Pcol2Ccol,
710  const LocalOrdinalViewType & Icol2Ccol,
711  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
712  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
713  const std::string& label,
714  const Teuchos::RCP<Teuchos::ParameterList>& params) {
715 
716 #ifdef HAVE_TPETRA_MMM_TIMINGS
717  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
718  using Teuchos::TimeMonitor;
719  Teuchos::RCP<TimeMonitor> MM;
720 #endif
721 
722  // Lots and lots of typedefs
723  using Teuchos::RCP;
724 
725  // Options
726  std::string myalg("LTG");
727  if(!params.is_null()) {
728  if(params->isParameter("openmp: rap algorithm"))
729  myalg = params->get("openmp: rap algorithm",myalg);
730  }
731 
732  if(myalg == "LTG") {
733  // Use the LTG kernel if requested
734  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
735  }
736  else {
737  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
738  }
739 
740 #ifdef HAVE_TPETRA_MMM_TIMINGS
741  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
742 #endif
743  C.fillComplete(C.getDomainMap(), C.getRangeMap());
744 
745 }
746 
747 
748 
749 /*********************************************************************************************************/
750 template<class Scalar,
751  class LocalOrdinal,
752  class GlobalOrdinal,
753  class LocalOrdinalViewType>
754 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
755 
756  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
757  const LocalOrdinalViewType & Acol2Prow,
758  const LocalOrdinalViewType & Acol2PIrow,
759  const LocalOrdinalViewType & Pcol2Accol,
760  const LocalOrdinalViewType & PIcol2Accol,
761  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
762  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
763  const std::string& label,
764  const Teuchos::RCP<Teuchos::ParameterList>& params) {
765 
766 
767 #ifdef HAVE_TPETRA_MMM_TIMINGS
768  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
769  using Teuchos::TimeMonitor;
770  Teuchos::RCP<TimeMonitor> MM;
771 #endif
772 
773  // Node-specific code
774  std::string nodename("OpenMP");
775 
776  // Options
777  std::string myalg("LTG");
778 
779  if(!params.is_null()) {
780  if(params->isParameter("openmp: ptap algorithm"))
781  myalg = params->get("openmp: ptap algorithm",myalg);
782  }
783 
784  if(myalg == "LTG") {
785 #ifdef HAVE_TPETRA_MMM_TIMINGS
786  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
787 #endif
788 
789  using Teuchos::ParameterList;
790  using Teuchos::RCP;
791  using LO = LocalOrdinal;
792  using GO = GlobalOrdinal;
793  using SC = Scalar;
794 
795  // We don't need a kernel-level PTAP, we just transpose here
796  using transposer_type =
797  RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
798  transposer_type transposer (Pview.origMatrix, label + "XP: ");
799  RCP<ParameterList> transposeParams (new ParameterList);
800  if (! params.is_null ()) {
801  transposeParams->set ("compute global constants",
802  params->get ("compute global constants: temporaries",
803  false));
804  }
805  transposeParams->set ("sort", false);
806  RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
807  transposer.createTransposeLocal (transposeParams);
808  CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
809  Rview.origMatrix = Ptrans;
810 
811  using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
812  mult_R_A_P_newmatrix_LowThreadGustavsonKernel
813  (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
814  PIcol2Accol, Ac, Acimport, label, params);
815  }
816  else {
817  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
818  }
819 }
820 
821 /*********************************************************************************************************/
822 template<class Scalar,
823  class LocalOrdinal,
824  class GlobalOrdinal,
825  class LocalOrdinalViewType>
826 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
827 
828  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
829  const LocalOrdinalViewType & Acol2Prow,
830  const LocalOrdinalViewType & Acol2PIrow,
831  const LocalOrdinalViewType & Pcol2Accol,
832  const LocalOrdinalViewType & PIcol2Accol,
833  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
834  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
835  const std::string& label,
836  const Teuchos::RCP<Teuchos::ParameterList>& params) {
837 
838 
839 #ifdef HAVE_TPETRA_MMM_TIMINGS
840  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
841  using Teuchos::TimeMonitor;
842  Teuchos::RCP<TimeMonitor> MM;
843 #endif
844 
845  // Node-specific code
846  std::string nodename("OpenMP");
847 
848  // Options
849  std::string myalg("LTG");
850 
851  if(!params.is_null()) {
852  if(params->isParameter("openmp: ptap algorithm"))
853  myalg = params->get("openmp: ptap algorithm",myalg);
854  }
855 
856  if(myalg == "LTG") {
857 #ifdef HAVE_TPETRA_MMM_TIMINGS
858  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
859 #endif
860 
861  using Teuchos::ParameterList;
862  using Teuchos::RCP;
863  using LO = LocalOrdinal;
864  using GO = GlobalOrdinal;
865  using SC = Scalar;
866 
867  // We don't need a kernel-level PTAP, we just transpose here
868  using transposer_type =
869  RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
870  transposer_type transposer (Pview.origMatrix, label + "XP: ");
871  RCP<ParameterList> transposeParams (new ParameterList);
872  if (! params.is_null ()) {
873  transposeParams->set ("compute global constants",
874  params->get ("compute global constants: temporaries",
875  false));
876  }
877  transposeParams->set ("sort", false);
878  RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
879  transposer.createTransposeLocal (transposeParams);
880  CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
881  Rview.origMatrix = Ptrans;
882 
883  using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
884  mult_R_A_P_reuse_LowThreadGustavsonKernel
885  (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
886  PIcol2Accol, Ac, Acimport, label, params);
887  }
888  else {
889  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
890  }
891  Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
892 }
893 
894 
895 }//MMdetails
896 }//Tpetra
897 
898 #endif//OpenMP
899 
900 #endif
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.