Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_OpenMP.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
44 
45 #ifdef HAVE_TPETRA_INST_OPENMP
46 namespace Tpetra {
47 namespace MMdetails {
48 
49 /*********************************************************************************************************/
50 // MMM KernelWrappers for Partial Specialization to OpenMP
51 template<class Scalar,
52  class LocalOrdinal,
53  class GlobalOrdinal, class LocalOrdinalViewType>
54 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
55  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
56  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
57  const LocalOrdinalViewType & Acol2Brow,
58  const LocalOrdinalViewType & Acol2Irow,
59  const LocalOrdinalViewType & Bcol2Ccol,
60  const LocalOrdinalViewType & Icol2Ccol,
61  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
62  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
63  const std::string& label = std::string(),
64  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
65 
66  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
67  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
68  const LocalOrdinalViewType & Acol2Brow,
69  const LocalOrdinalViewType & Acol2Irow,
70  const LocalOrdinalViewType & Bcol2Ccol,
71  const LocalOrdinalViewType & Icol2Ccol,
72  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
73  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
74  const std::string& label = std::string(),
75  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76 
77 
78 };
79 
80 
81 // Jacobi KernelWrappers for Partial Specialization to OpenMP
82 template<class Scalar,
83  class LocalOrdinal,
84  class GlobalOrdinal, class LocalOrdinalViewType>
85 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
86  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
87  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
88  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
89  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
90  const LocalOrdinalViewType & Acol2Brow,
91  const LocalOrdinalViewType & Acol2Irow,
92  const LocalOrdinalViewType & Bcol2Ccol,
93  const LocalOrdinalViewType & Icol2Ccol,
94  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
95  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
96  const std::string& label = std::string(),
97  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
98 
99  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
100  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
101  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
102  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
103  const LocalOrdinalViewType & Acol2Brow,
104  const LocalOrdinalViewType & Acol2Irow,
105  const LocalOrdinalViewType & Bcol2Ccol,
106  const LocalOrdinalViewType & Icol2Ccol,
107  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
108  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
109  const std::string& label = std::string(),
110  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
111 
112  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
113  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
114  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
115  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
116  const LocalOrdinalViewType & Acol2Brow,
117  const LocalOrdinalViewType & Acol2Irow,
118  const LocalOrdinalViewType & Bcol2Ccol,
119  const LocalOrdinalViewType & Icol2Ccol,
120  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
121  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
122  const std::string& label = std::string(),
123  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
124 
125 };
126 
127 
128 // Triple-Product KernelWrappers for Partial Specialization to OpenMP
129 template<class Scalar,
130  class LocalOrdinal,
131  class GlobalOrdinal, class LocalOrdinalViewType>
132 struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
133  static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
134  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
135  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
136  const LocalOrdinalViewType & Acol2Prow,
137  const LocalOrdinalViewType & Acol2PIrow,
138  const LocalOrdinalViewType & Pcol2Ccol,
139  const LocalOrdinalViewType & PIcol2Ccol,
140  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
141  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
142  const std::string& label = std::string(),
143  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
144 
145 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
146  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
147  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
148  const LocalOrdinalViewType & Acol2Prow,
149  const LocalOrdinalViewType & Acol2PIrow,
150  const LocalOrdinalViewType & Pcol2Ccol,
151  const LocalOrdinalViewType & PIcol2Ccol,
152  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
153  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
154  const std::string& label = std::string(),
155  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
156 
157  static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
158  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
159  const LocalOrdinalViewType & Acol2Prow,
160  const LocalOrdinalViewType & Acol2PIrow,
161  const LocalOrdinalViewType & Pcol2Ccol,
162  const LocalOrdinalViewType & PIcol2Ccol,
163  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
164  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
165  const std::string& label = std::string(),
166  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
167 
168  static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
169  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
170  const LocalOrdinalViewType & Acol2Prow,
171  const LocalOrdinalViewType & Acol2PIrow,
172  const LocalOrdinalViewType & Pcol2Ccol,
173  const LocalOrdinalViewType & PIcol2Ccol,
174  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
175  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
176  const std::string& label = std::string(),
177  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
178 };
179 
180 
181 /*********************************************************************************************************/
182 template<class Scalar,
183  class LocalOrdinal,
184  class GlobalOrdinal,
185  class LocalOrdinalViewType>
186 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
187  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
188  const LocalOrdinalViewType & Acol2Brow,
189  const LocalOrdinalViewType & Acol2Irow,
190  const LocalOrdinalViewType & Bcol2Ccol,
191  const LocalOrdinalViewType & Icol2Ccol,
192  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
193  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
194  const std::string& label,
195  const Teuchos::RCP<Teuchos::ParameterList>& params) {
196 
197 #ifdef HAVE_TPETRA_MMM_TIMINGS
198  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
199  using Teuchos::TimeMonitor;
200  Teuchos::RCP<TimeMonitor> MM;
201 #endif
202 
203  // Node-specific code
204  std::string nodename("OpenMP");
205 
206  // Lots and lots of typedefs
207  using Teuchos::RCP;
209  typedef typename KCRS::device_type device_t;
210  typedef typename KCRS::StaticCrsGraphType graph_t;
211  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
212  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
213  typedef typename KCRS::values_type::non_const_type scalar_view_t;
214 
215  // Options
216  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
217  std::string myalg("SPGEMM_KK_MEMORY");
218 
219 
220  if(!params.is_null()) {
221  if(params->isParameter("openmp: algorithm"))
222  myalg = params->get("openmp: algorithm",myalg);
223  if(params->isParameter("openmp: team work size"))
224  team_work_size = params->get("openmp: team work size",team_work_size);
225  }
226 
227  if(myalg == "LTG") {
228  // Use the LTG kernel if requested
229  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
230  }
231  else {
232  // Use the Kokkos-Kernels OpenMP Kernel
233 #ifdef HAVE_TPETRA_MMM_TIMINGS
234  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
235 #endif
236  // KokkosKernelsHandle
237  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
238  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
239  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
240 
241  // Grab the Kokkos::SparseCrsMatrices
242  const KCRS & Ak = Aview.origMatrix->getLocalMatrix();
243  // const KCRS & Bk = Bview.origMatrix->getLocalMatrix();
244 
245  // Get the algorithm mode
246  std::string alg = nodename+std::string(" algorithm");
247  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
248  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
249  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
250 
251  // Merge the B and Bimport matrices
252  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements());
253 
254 #ifdef HAVE_TPETRA_MMM_TIMINGS
255  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
256 #endif
257 
258  // Do the multiply on whatever we've got
259  typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
260  // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
261  // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
262  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
263  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
264 
265 
266  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
267  lno_nnz_view_t entriesC;
268  scalar_view_t valuesC;
269  KernelHandle kh;
270  kh.create_spgemm_handle(alg_enum);
271  kh.set_team_work_size(team_work_size);
272  // KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
273  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
274 
275  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
276  if (c_nnz_size){
277  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
278  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
279  }
280  // KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
281  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
282  kh.destroy_spgemm_handle();
283 
284 #ifdef HAVE_TPETRA_MMM_TIMINGS
285  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
286 #endif
287  // Sort & set values
288  if (params.is_null() || params->get("sort entries",true))
289  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
290  C.setAllValues(row_mapC,entriesC,valuesC);
291 
292  }// end OMP KokkosKernels loop
293 
294 #ifdef HAVE_TPETRA_MMM_TIMINGS
295  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
296 #endif
297 
298  // Final Fillcomplete
299  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
300  labelList->set("Timer Label",label);
301  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
302  RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
303  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
304 
305 #if 0
306  {
307  Teuchos::ArrayRCP< const size_t > Crowptr;
308  Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
309  Teuchos::ArrayRCP< const Scalar > Cvalues;
310  C.getAllValues(Crowptr,Ccolind,Cvalues);
311 
312  // DEBUG
313  int MyPID = C->getComm()->getRank();
314  printf("[%d] Crowptr = ",MyPID);
315  for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
316  printf("%3d ",(int)Crowptr.getConst()[i]);
317  }
318  printf("\n");
319  printf("[%d] Ccolind = ",MyPID);
320  for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
321  printf("%3d ",(int)Ccolind.getConst()[i]);
322  }
323  printf("\n");
324  fflush(stdout);
325  // END DEBUG
326  }
327 #endif
328 }
329 
330 
331 /*********************************************************************************************************/
332 template<class Scalar,
333  class LocalOrdinal,
334  class GlobalOrdinal,
335  class LocalOrdinalViewType>
336 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
337  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
338  const LocalOrdinalViewType & Acol2Brow,
339  const LocalOrdinalViewType & Acol2Irow,
340  const LocalOrdinalViewType & Bcol2Ccol,
341  const LocalOrdinalViewType & Icol2Ccol,
342  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
343  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
344  const std::string& label,
345  const Teuchos::RCP<Teuchos::ParameterList>& params) {
346 #ifdef HAVE_TPETRA_MMM_TIMINGS
347  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
348  using Teuchos::TimeMonitor;
349  Teuchos::RCP<TimeMonitor> MM;
350 #endif
351 
352  // Lots and lots of typedefs
353  using Teuchos::RCP;
354 
355  // Options
356  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
357  std::string myalg("LTG");
358  if(!params.is_null()) {
359  if(params->isParameter("openmp: algorithm"))
360  myalg = params->get("openmp: algorithm",myalg);
361  if(params->isParameter("openmp: team work size"))
362  team_work_size = params->get("openmp: team work size",team_work_size);
363  }
364 
365  if(myalg == "LTG") {
366  // Use the LTG kernel if requested
367  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
368  }
369  else {
370  throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
371  }
372 
373 #ifdef HAVE_TPETRA_MMM_TIMINGS
374  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
375 #endif
376  C.fillComplete(C.getDomainMap(), C.getRangeMap());
377 }
378 
379 
380 /*********************************************************************************************************/
381 template<class Scalar,
382  class LocalOrdinal,
383  class GlobalOrdinal,
384  class LocalOrdinalViewType>
385 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
386  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
387  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
388  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
389  const LocalOrdinalViewType & Acol2Brow,
390  const LocalOrdinalViewType & Acol2Irow,
391  const LocalOrdinalViewType & Bcol2Ccol,
392  const LocalOrdinalViewType & Icol2Ccol,
393  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
394  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
395  const std::string& label,
396  const Teuchos::RCP<Teuchos::ParameterList>& params) {
397 
398 #ifdef HAVE_TPETRA_MMM_TIMINGS
399  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
400  using Teuchos::TimeMonitor;
401  Teuchos::RCP<TimeMonitor> MM;
402 #endif
403 
404  // Node-specific code
405  using Teuchos::RCP;
406 
407  // Options
408  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
409  std::string myalg("LTG");
410  if(!params.is_null()) {
411  if(params->isParameter("openmp: jacobi algorithm"))
412  myalg = params->get("openmp: jacobi algorithm",myalg);
413  if(params->isParameter("openmp: team work size"))
414  team_work_size = params->get("openmp: team work size",team_work_size);
415  }
416 
417  if(myalg == "LTG") {
418  // Use the LTG kernel if requested
419  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
420  }
421  else if(myalg == "MSAK") {
422  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
423  }
424  else if(myalg == "KK") {
425  jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
426  }
427  else {
428  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
429  }
430 
431 #ifdef HAVE_TPETRA_MMM_TIMINGS
432  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
433 #endif
434 
435  // Final Fillcomplete
436  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
437  labelList->set("Timer Label",label);
438  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
439 
440  // NOTE: MSAK already fillCompletes, so we have to check here
441  if(!C.isFillComplete()) {
442  RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
443  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
444  }
445 
446 }
447 
448 
449 
450 /*********************************************************************************************************/
451 template<class Scalar,
452  class LocalOrdinal,
453  class GlobalOrdinal,
454  class LocalOrdinalViewType>
455 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
456  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
457  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
458  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
459  const LocalOrdinalViewType & Acol2Brow,
460  const LocalOrdinalViewType & Acol2Irow,
461  const LocalOrdinalViewType & Bcol2Ccol,
462  const LocalOrdinalViewType & Icol2Ccol,
463  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
464  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
465  const std::string& label,
466  const Teuchos::RCP<Teuchos::ParameterList>& params) {
467 
468 #ifdef HAVE_TPETRA_MMM_TIMINGS
469  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
470  using Teuchos::TimeMonitor;
471  Teuchos::RCP<TimeMonitor> MM;
472 #endif
473 
474  // Lots and lots of typedefs
475  using Teuchos::RCP;
476 
477  // Options
478  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
479  std::string myalg("LTG");
480  if(!params.is_null()) {
481  if(params->isParameter("openmp: jacobi algorithm"))
482  myalg = params->get("openmp: jacobi algorithm",myalg);
483  if(params->isParameter("openmp: team work size"))
484  team_work_size = params->get("openmp: team work size",team_work_size);
485  }
486 
487  if(myalg == "LTG") {
488  // Use the LTG kernel if requested
489  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
490  }
491  else {
492  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
493  }
494 
495 #ifdef HAVE_TPETRA_MMM_TIMINGS
496  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
497 #endif
498  C.fillComplete(C.getDomainMap(), C.getRangeMap());
499 
500 }
501 
502 /*********************************************************************************************************/
503 template<class Scalar,
504  class LocalOrdinal,
505  class GlobalOrdinal,
506  class LocalOrdinalViewType>
507 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
508  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
509  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
510  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
511  const LocalOrdinalViewType & Acol2Brow,
512  const LocalOrdinalViewType & Acol2Irow,
513  const LocalOrdinalViewType & Bcol2Ccol,
514  const LocalOrdinalViewType & Icol2Ccol,
515  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
516  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
517  const std::string& label,
518  const Teuchos::RCP<Teuchos::ParameterList>& params) {
519 
520 #ifdef HAVE_TPETRA_MMM_TIMINGS
521  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
522  using Teuchos::TimeMonitor;
523  Teuchos::RCP<TimeMonitor> MM;
524 #endif
525 
526  // Check if the diagonal entries exist in debug mode
527  const bool debug = Tpetra::Details::Behavior::debug();
528  if(debug) {
529 
530  auto rowMap = Aview.origMatrix->getRowMap();
532  Aview.origMatrix->getLocalDiagCopy(diags);
533  size_t diagLength = rowMap->getNodeNumElements();
534  Teuchos::Array<Scalar> diagonal(diagLength);
535  diags.get1dCopy(diagonal());
536 
537  for(size_t i = 0; i < diagLength; ++i) {
538  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
539  std::runtime_error,
540  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
541  "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
542  }
543  }
544 
545  // Usings
546  using device_t = typename Kokkos::Compat::KokkosOpenMPWrapperNode::device_type;
548  using graph_t = typename matrix_t::StaticCrsGraphType;
549  using lno_view_t = typename graph_t::row_map_type::non_const_type;
550  using c_lno_view_t = typename graph_t::row_map_type::const_type;
551  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
552  using scalar_view_t = typename matrix_t::values_type::non_const_type;
553 
554  // KokkosKernels handle
555  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
556  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
557  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
558 
559  // Get the rowPtr, colInd and vals of importMatrix
560  c_lno_view_t Irowptr;
561  lno_nnz_view_t Icolind;
562  scalar_view_t Ivals;
563  if(!Bview.importMatrix.is_null()) {
564  Irowptr = Bview.importMatrix->getLocalMatrix().graph.row_map;
565  Icolind = Bview.importMatrix->getLocalMatrix().graph.entries;
566  Ivals = Bview.importMatrix->getLocalMatrix().values;
567  }
568 
569  // Merge the B and Bimport matrices
570  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements());
571 
572  // Get the properties and arrays of input matrices
573  const matrix_t & Amat = Aview.origMatrix->getLocalMatrix();
574  const matrix_t & Bmat = Bview.origMatrix->getLocalMatrix();
575 
576  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
577  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
578  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
579 
580  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
581  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
582  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
583 
584  // Arrays of the output matrix
585  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
586  lno_nnz_view_t entriesC;
587  scalar_view_t valuesC;
588 
589  // Options
590  int team_work_size = 16;
591  std::string myalg("SPGEMM_KK_MEMORY");
592  if(!params.is_null()) {
593  if(params->isParameter("cuda: algorithm"))
594  myalg = params->get("cuda: algorithm",myalg);
595  if(params->isParameter("cuda: team work size"))
596  team_work_size = params->get("cuda: team work size",team_work_size);
597  }
598 
599  // Get the algorithm mode
600  std::string nodename("OpenMP");
601  std::string alg = nodename + std::string(" algorithm");
602  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
603  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
604 
605 
606  // KokkosKernels call
607  handle_t kh;
608  kh.create_spgemm_handle(alg_enum);
609  kh.set_team_work_size(team_work_size);
610 
611  KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
612  Arowptr, Acolind, false,
613  Browptr, Bcolind, false,
614  row_mapC);
615 
616  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
617  if (c_nnz_size){
618  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
619  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
620  }
621 
622  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
623  Arowptr, Acolind, Avals, false,
624  Browptr, Bcolind, Bvals, false,
625  row_mapC, entriesC, valuesC,
626  omega, Dinv.getLocalViewDevice());
627  kh.destroy_spgemm_handle();
628 
629 #ifdef HAVE_TPETRA_MMM_TIMINGS
630  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
631 #endif
632 
633  // Sort & set values
634  if (params.is_null() || params->get("sort entries",true))
635  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
636  C.setAllValues(row_mapC,entriesC,valuesC);
637 
638 #ifdef HAVE_TPETRA_MMM_TIMINGS
639  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
640 #endif
641 
642  // Final Fillcomplete
643  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
644  labelList->set("Timer Label",label);
645  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
646  Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
647  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
648 }
649 
650 
651 /*********************************************************************************************************/
652 template<class Scalar,
653  class LocalOrdinal,
654  class GlobalOrdinal,
655  class LocalOrdinalViewType>
656 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
657  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
658  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
659  const LocalOrdinalViewType & Acol2Prow,
660  const LocalOrdinalViewType & Acol2PIrow,
661  const LocalOrdinalViewType & Pcol2Accol,
662  const LocalOrdinalViewType & PIcol2Accol,
663  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
664  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
665  const std::string& label,
666  const Teuchos::RCP<Teuchos::ParameterList>& params) {
667 
668 
669 
670 #ifdef HAVE_TPETRA_MMM_TIMINGS
671  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
672  using Teuchos::TimeMonitor;
673  Teuchos::RCP<TimeMonitor> MM;
674 #endif
675 
676  // Node-specific code
677  std::string nodename("OpenMP");
678 
679  // Options
680  std::string myalg("LTG");
681 
682  if(!params.is_null()) {
683  if(params->isParameter("openmp: rap algorithm"))
684  myalg = params->get("openmp: rap algorithm",myalg);
685  }
686 
687  if(myalg == "LTG") {
688  // Use the LTG kernel if requested
689  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
690  }
691  else {
692  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
693  }
694 }
695 
696 /*********************************************************************************************************/
697 template<class Scalar,
698  class LocalOrdinal,
699  class GlobalOrdinal,
700  class LocalOrdinalViewType>
701 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
702  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
703  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
704  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
705 
706  const LocalOrdinalViewType & Acol2Prow,
707  const LocalOrdinalViewType & Acol2Irow,
708  const LocalOrdinalViewType & Pcol2Ccol,
709  const LocalOrdinalViewType & Icol2Ccol,
710  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
711  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
712  const std::string& label,
713  const Teuchos::RCP<Teuchos::ParameterList>& params) {
714 
715 #ifdef HAVE_TPETRA_MMM_TIMINGS
716  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
717  using Teuchos::TimeMonitor;
718  Teuchos::RCP<TimeMonitor> MM;
719 #endif
720 
721  // Lots and lots of typedefs
722  using Teuchos::RCP;
723 
724  // Options
725  std::string myalg("LTG");
726  if(!params.is_null()) {
727  if(params->isParameter("openmp: rap algorithm"))
728  myalg = params->get("openmp: rap algorithm",myalg);
729  }
730 
731  if(myalg == "LTG") {
732  // Use the LTG kernel if requested
733  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
734  }
735  else {
736  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
737  }
738 
739 #ifdef HAVE_TPETRA_MMM_TIMINGS
740  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
741 #endif
742  C.fillComplete(C.getDomainMap(), C.getRangeMap());
743 
744 }
745 
746 
747 
748 /*********************************************************************************************************/
749 template<class Scalar,
750  class LocalOrdinal,
751  class GlobalOrdinal,
752  class LocalOrdinalViewType>
753 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
754 
755  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
756  const LocalOrdinalViewType & Acol2Prow,
757  const LocalOrdinalViewType & Acol2PIrow,
758  const LocalOrdinalViewType & Pcol2Accol,
759  const LocalOrdinalViewType & PIcol2Accol,
760  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
761  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
762  const std::string& label,
763  const Teuchos::RCP<Teuchos::ParameterList>& params) {
764 
765 
766 #ifdef HAVE_TPETRA_MMM_TIMINGS
767  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
768  using Teuchos::TimeMonitor;
769  Teuchos::RCP<TimeMonitor> MM;
770 #endif
771 
772  // Node-specific code
773  std::string nodename("OpenMP");
774 
775  // Options
776  std::string myalg("LTG");
777 
778  if(!params.is_null()) {
779  if(params->isParameter("openmp: ptap algorithm"))
780  myalg = params->get("openmp: ptap algorithm",myalg);
781  }
782 
783  if(myalg == "LTG") {
784 #ifdef HAVE_TPETRA_MMM_TIMINGS
785  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
786 #endif
787 
788  using Teuchos::ParameterList;
789  using Teuchos::RCP;
790  using LO = LocalOrdinal;
791  using GO = GlobalOrdinal;
792  using SC = Scalar;
793 
794  // We don't need a kernel-level PTAP, we just transpose here
795  using transposer_type =
796  RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
797  transposer_type transposer (Pview.origMatrix, label + "XP: ");
798  RCP<ParameterList> transposeParams (new ParameterList);
799  if (! params.is_null ()) {
800  transposeParams->set ("compute global constants",
801  params->get ("compute global constants: temporaries",
802  false));
803  }
804  transposeParams->set ("sort", false);
805  RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
806  transposer.createTransposeLocal (transposeParams);
807  CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
808  Rview.origMatrix = Ptrans;
809 
810  using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
811  mult_R_A_P_newmatrix_LowThreadGustavsonKernel
812  (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
813  PIcol2Accol, Ac, Acimport, label, params);
814  }
815  else {
816  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
817  }
818 }
819 
820 /*********************************************************************************************************/
821 template<class Scalar,
822  class LocalOrdinal,
823  class GlobalOrdinal,
824  class LocalOrdinalViewType>
825 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
826 
827  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
828  const LocalOrdinalViewType & Acol2Prow,
829  const LocalOrdinalViewType & Acol2PIrow,
830  const LocalOrdinalViewType & Pcol2Accol,
831  const LocalOrdinalViewType & PIcol2Accol,
832  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
833  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
834  const std::string& label,
835  const Teuchos::RCP<Teuchos::ParameterList>& params) {
836 
837 
838 #ifdef HAVE_TPETRA_MMM_TIMINGS
839  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
840  using Teuchos::TimeMonitor;
841  Teuchos::RCP<TimeMonitor> MM;
842 #endif
843 
844  // Node-specific code
845  std::string nodename("OpenMP");
846 
847  // Options
848  std::string myalg("LTG");
849 
850  if(!params.is_null()) {
851  if(params->isParameter("openmp: ptap algorithm"))
852  myalg = params->get("openmp: ptap algorithm",myalg);
853  }
854 
855  if(myalg == "LTG") {
856 #ifdef HAVE_TPETRA_MMM_TIMINGS
857  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
858 #endif
859 
860  using Teuchos::ParameterList;
861  using Teuchos::RCP;
862  using LO = LocalOrdinal;
863  using GO = GlobalOrdinal;
864  using SC = Scalar;
865 
866  // We don't need a kernel-level PTAP, we just transpose here
867  using transposer_type =
868  RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
869  transposer_type transposer (Pview.origMatrix, label + "XP: ");
870  RCP<ParameterList> transposeParams (new ParameterList);
871  if (! params.is_null ()) {
872  transposeParams->set ("compute global constants",
873  params->get ("compute global constants: temporaries",
874  false));
875  }
876  transposeParams->set ("sort", false);
877  RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
878  transposer.createTransposeLocal (transposeParams);
879  CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
880  Rview.origMatrix = Ptrans;
881 
882  using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
883  mult_R_A_P_reuse_LowThreadGustavsonKernel
884  (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
885  PIcol2Accol, Ac, Acimport, label, params);
886  }
887  else {
888  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
889  }
890  Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
891 }
892 
893 
894 }//MMdetails
895 }//Tpetra
896 
897 #endif//OpenMP
898 
899 #endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
A distributed dense vector.