Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
TpetraExt_MatrixMatrix_OpenMP.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
44 
45 #ifdef HAVE_TPETRA_INST_OPENMP
46 namespace Tpetra {
47 namespace MMdetails {
48 
49 /*********************************************************************************************************/
50 // MMM KernelWrappers for Partial Specialization to OpenMP
51 template<class Scalar,
52  class LocalOrdinal,
53  class GlobalOrdinal, class LocalOrdinalViewType>
54 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
55  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
56  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
57  const LocalOrdinalViewType & Acol2Brow,
58  const LocalOrdinalViewType & Acol2Irow,
59  const LocalOrdinalViewType & Bcol2Ccol,
60  const LocalOrdinalViewType & Icol2Ccol,
61  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
62  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
63  const std::string& label = std::string(),
64  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
65 
66  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
67  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
68  const LocalOrdinalViewType & Acol2Brow,
69  const LocalOrdinalViewType & Acol2Irow,
70  const LocalOrdinalViewType & Bcol2Ccol,
71  const LocalOrdinalViewType & Icol2Ccol,
72  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
73  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
74  const std::string& label = std::string(),
75  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76 
77 
78 };
79 
80 
81 // Jacobi KernelWrappers for Partial Specialization to OpenMP
82 template<class Scalar,
83  class LocalOrdinal,
84  class GlobalOrdinal, class LocalOrdinalViewType>
85 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
86  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
87  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
88  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
89  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
90  const LocalOrdinalViewType & Acol2Brow,
91  const LocalOrdinalViewType & Acol2Irow,
92  const LocalOrdinalViewType & Bcol2Ccol,
93  const LocalOrdinalViewType & Icol2Ccol,
94  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
95  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
96  const std::string& label = std::string(),
97  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
98 
99  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
100  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
101  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
102  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
103  const LocalOrdinalViewType & Acol2Brow,
104  const LocalOrdinalViewType & Acol2Irow,
105  const LocalOrdinalViewType & Bcol2Ccol,
106  const LocalOrdinalViewType & Icol2Ccol,
107  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
108  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
109  const std::string& label = std::string(),
110  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
111 
112 };
113 
114 
115 // Triple-Product KernelWrappers for Partial Specialization to OpenMP
116 template<class Scalar,
117  class LocalOrdinal,
118  class GlobalOrdinal, class LocalOrdinalViewType>
119 struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
120  static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
121  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
122  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
123  const LocalOrdinalViewType & Acol2Prow,
124  const LocalOrdinalViewType & Acol2PIrow,
125  const LocalOrdinalViewType & Pcol2Ccol,
126  const LocalOrdinalViewType & PIcol2Ccol,
127  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
128  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
129  const std::string& label = std::string(),
130  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
131 
132 static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
133  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
134  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
135  const LocalOrdinalViewType & Acol2Prow,
136  const LocalOrdinalViewType & Acol2PIrow,
137  const LocalOrdinalViewType & Pcol2Ccol,
138  const LocalOrdinalViewType & PIcol2Ccol,
139  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
140  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
141  const std::string& label = std::string(),
142  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
143 
144  static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
145  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
146  const LocalOrdinalViewType & Acol2Prow,
147  const LocalOrdinalViewType & Acol2PIrow,
148  const LocalOrdinalViewType & Pcol2Ccol,
149  const LocalOrdinalViewType & PIcol2Ccol,
150  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
151  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
152  const std::string& label = std::string(),
153  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
154 
155  static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
156  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
157  const LocalOrdinalViewType & Acol2Prow,
158  const LocalOrdinalViewType & Acol2PIrow,
159  const LocalOrdinalViewType & Pcol2Ccol,
160  const LocalOrdinalViewType & PIcol2Ccol,
161  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
162  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
163  const std::string& label = std::string(),
164  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
165 };
166 
167 
168 /*********************************************************************************************************/
169 template<class Scalar,
170  class LocalOrdinal,
171  class GlobalOrdinal,
172  class LocalOrdinalViewType>
173 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
174  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
175  const LocalOrdinalViewType & Acol2Brow,
176  const LocalOrdinalViewType & Acol2Irow,
177  const LocalOrdinalViewType & Bcol2Ccol,
178  const LocalOrdinalViewType & Icol2Ccol,
179  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
180  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
181  const std::string& label,
182  const Teuchos::RCP<Teuchos::ParameterList>& params) {
183 
184 #ifdef HAVE_TPETRA_MMM_TIMINGS
185  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
186  using Teuchos::TimeMonitor;
187  Teuchos::RCP<TimeMonitor> MM;
188 #endif
189 
190  // Node-specific code
191  std::string nodename("OpenMP");
192 
193  // Lots and lots of typedefs
194  using Teuchos::RCP;
196  typedef typename KCRS::device_type device_t;
197  typedef typename KCRS::StaticCrsGraphType graph_t;
198  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
199  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
200  typedef typename KCRS::values_type::non_const_type scalar_view_t;
201 
202  // Options
203  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
204  std::string myalg("SPGEMM_KK_MEMORY");
205 
206 
207  if(!params.is_null()) {
208  if(params->isParameter("openmp: algorithm"))
209  myalg = params->get("openmp: algorithm",myalg);
210  if(params->isParameter("openmp: team work size"))
211  team_work_size = params->get("openmp: team work size",team_work_size);
212  }
213 
214  if(myalg == "LTG") {
215  // Use the LTG kernel if requested
216  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
217  }
218  else {
219  // Use the Kokkos-Kernels OpenMP Kernel
220 #ifdef HAVE_TPETRA_MMM_TIMINGS
221  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
222 #endif
223  // KokkosKernelsHandle
224  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
225  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
226  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
227 
228  // Grab the Kokkos::SparseCrsMatrices
229  const KCRS & Ak = Aview.origMatrix->getLocalMatrix();
230  // const KCRS & Bk = Bview.origMatrix->getLocalMatrix();
231 
232  // Get the algorithm mode
233  std::string alg = nodename+std::string(" algorithm");
234  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
235  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
236  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
237 
238  // Merge the B and Bimport matrices
239  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getNodeNumElements());
240 
241 #ifdef HAVE_TPETRA_MMM_TIMINGS
242  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
243 #endif
244 
245  // Do the multiply on whatever we've got
246  typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
247  // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
248  // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
249  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
250  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
251 
252 
253  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
254  lno_nnz_view_t entriesC;
255  scalar_view_t valuesC;
256  KernelHandle kh;
257  kh.create_spgemm_handle(alg_enum);
258  kh.set_team_work_size(team_work_size);
259  // KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
260  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
261 
262  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
263  if (c_nnz_size){
264  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
265  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
266  }
267  // KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
268  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
269  kh.destroy_spgemm_handle();
270 
271 #ifdef HAVE_TPETRA_MMM_TIMINGS
272  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
273 #endif
274  // Sort & set values
275  if (params.is_null() || params->get("sort entries",true))
276  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
277  C.setAllValues(row_mapC,entriesC,valuesC);
278 
279  }// end OMP KokkosKernels loop
280 
281 #ifdef HAVE_TPETRA_MMM_TIMINGS
282  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
283 #endif
284 
285  // Final Fillcomplete
286  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
287  labelList->set("Timer Label",label);
288  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
289  RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
290  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
291 
292 #if 0
293  {
294  Teuchos::ArrayRCP< const size_t > Crowptr;
295  Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
296  Teuchos::ArrayRCP< const Scalar > Cvalues;
297  C.getAllValues(Crowptr,Ccolind,Cvalues);
298 
299  // DEBUG
300  int MyPID = C->getComm()->getRank();
301  printf("[%d] Crowptr = ",MyPID);
302  for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
303  printf("%3d ",(int)Crowptr.getConst()[i]);
304  }
305  printf("\n");
306  printf("[%d] Ccolind = ",MyPID);
307  for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
308  printf("%3d ",(int)Ccolind.getConst()[i]);
309  }
310  printf("\n");
311  fflush(stdout);
312  // END DEBUG
313  }
314 #endif
315 }
316 
317 
318 /*********************************************************************************************************/
319 template<class Scalar,
320  class LocalOrdinal,
321  class GlobalOrdinal,
322  class LocalOrdinalViewType>
323 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
324  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
325  const LocalOrdinalViewType & Acol2Brow,
326  const LocalOrdinalViewType & Acol2Irow,
327  const LocalOrdinalViewType & Bcol2Ccol,
328  const LocalOrdinalViewType & Icol2Ccol,
329  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
330  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
331  const std::string& label,
332  const Teuchos::RCP<Teuchos::ParameterList>& params) {
333 #ifdef HAVE_TPETRA_MMM_TIMINGS
334  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
335  using Teuchos::TimeMonitor;
336  Teuchos::RCP<TimeMonitor> MM;
337 #endif
338 
339  // Lots and lots of typedefs
340  using Teuchos::RCP;
341 
342  // Options
343  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
344  std::string myalg("LTG");
345  if(!params.is_null()) {
346  if(params->isParameter("openmp: algorithm"))
347  myalg = params->get("openmp: algorithm",myalg);
348  if(params->isParameter("openmp: team work size"))
349  team_work_size = params->get("openmp: team work size",team_work_size);
350  }
351 
352  if(myalg == "LTG") {
353  // Use the LTG kernel if requested
354  ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
355  }
356  else {
357  throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
358  }
359 
360 #ifdef HAVE_TPETRA_MMM_TIMINGS
361  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
362 #endif
363  C.fillComplete(C.getDomainMap(), C.getRangeMap());
364 }
365 
366 
367 /*********************************************************************************************************/
368 template<class Scalar,
369  class LocalOrdinal,
370  class GlobalOrdinal,
371  class LocalOrdinalViewType>
372 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
373  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
374  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
375  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
376  const LocalOrdinalViewType & Acol2Brow,
377  const LocalOrdinalViewType & Acol2Irow,
378  const LocalOrdinalViewType & Bcol2Ccol,
379  const LocalOrdinalViewType & Icol2Ccol,
380  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
381  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
382  const std::string& label,
383  const Teuchos::RCP<Teuchos::ParameterList>& params) {
384 
385 #ifdef HAVE_TPETRA_MMM_TIMINGS
386  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
387  using Teuchos::TimeMonitor;
388  Teuchos::RCP<TimeMonitor> MM;
389 #endif
390 
391  // Node-specific code
392  using Teuchos::RCP;
393 
394  // Options
395  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
396  std::string myalg("LTG");
397  if(!params.is_null()) {
398  if(params->isParameter("openmp: jacobi algorithm"))
399  myalg = params->get("openmp: jacobi algorithm",myalg);
400  if(params->isParameter("openmp: team work size"))
401  team_work_size = params->get("openmp: team work size",team_work_size);
402  }
403 
404  if(myalg == "LTG") {
405  // Use the LTG kernel if requested
406  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
407  }
408  else if(myalg == "MSAK") {
409  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
410  }
411  else {
412  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
413  }
414 
415 #ifdef HAVE_TPETRA_MMM_TIMINGS
416  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
417 #endif
418 
419  // Final Fillcomplete
420  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
421  labelList->set("Timer Label",label);
422  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
423 
424  // NOTE: MSAK already fillCompletes, so we have to check here
425  if(!C.isFillComplete()) {
426  RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
427  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
428  }
429 
430 }
431 
432 
433 
434 /*********************************************************************************************************/
435 template<class Scalar,
436  class LocalOrdinal,
437  class GlobalOrdinal,
438  class LocalOrdinalViewType>
439 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
440  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
441  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
442  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
443  const LocalOrdinalViewType & Acol2Brow,
444  const LocalOrdinalViewType & Acol2Irow,
445  const LocalOrdinalViewType & Bcol2Ccol,
446  const LocalOrdinalViewType & Icol2Ccol,
447  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
448  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
449  const std::string& label,
450  const Teuchos::RCP<Teuchos::ParameterList>& params) {
451 
452 #ifdef HAVE_TPETRA_MMM_TIMINGS
453  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
454  using Teuchos::TimeMonitor;
455  Teuchos::RCP<TimeMonitor> MM;
456 #endif
457 
458  // Lots and lots of typedefs
459  using Teuchos::RCP;
460 
461  // Options
462  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
463  std::string myalg("LTG");
464  if(!params.is_null()) {
465  if(params->isParameter("openmp: jacobi algorithm"))
466  myalg = params->get("openmp: jacobi algorithm",myalg);
467  if(params->isParameter("openmp: team work size"))
468  team_work_size = params->get("openmp: team work size",team_work_size);
469  }
470 
471  if(myalg == "LTG") {
472  // Use the LTG kernel if requested
473  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
474  }
475  else {
476  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
477  }
478 
479 #ifdef HAVE_TPETRA_MMM_TIMINGS
480  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
481 #endif
482  C.fillComplete(C.getDomainMap(), C.getRangeMap());
483 
484 }
485 
486 
487 /*********************************************************************************************************/
488 template<class Scalar,
489  class LocalOrdinal,
490  class GlobalOrdinal,
491  class LocalOrdinalViewType>
492 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
493  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
494  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
495  const LocalOrdinalViewType & Acol2Prow,
496  const LocalOrdinalViewType & Acol2PIrow,
497  const LocalOrdinalViewType & Pcol2Accol,
498  const LocalOrdinalViewType & PIcol2Accol,
499  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
500  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
501  const std::string& label,
502  const Teuchos::RCP<Teuchos::ParameterList>& params) {
503 
504 
505 
506 #ifdef HAVE_TPETRA_MMM_TIMINGS
507  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
508  using Teuchos::TimeMonitor;
509  Teuchos::RCP<TimeMonitor> MM;
510 #endif
511 
512  // Node-specific code
513  std::string nodename("OpenMP");
514 
515  // Options
516  std::string myalg("LTG");
517 
518  if(!params.is_null()) {
519  if(params->isParameter("openmp: rap algorithm"))
520  myalg = params->get("openmp: rap algorithm",myalg);
521  }
522 
523  if(myalg == "LTG") {
524  // Use the LTG kernel if requested
525  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
526  }
527  else {
528  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
529  }
530 }
531 
532 /*********************************************************************************************************/
533 template<class Scalar,
534  class LocalOrdinal,
535  class GlobalOrdinal,
536  class LocalOrdinalViewType>
537 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
538  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
539  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
540  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
541 
542  const LocalOrdinalViewType & Acol2Prow,
543  const LocalOrdinalViewType & Acol2Irow,
544  const LocalOrdinalViewType & Pcol2Ccol,
545  const LocalOrdinalViewType & Icol2Ccol,
546  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
547  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
548  const std::string& label,
549  const Teuchos::RCP<Teuchos::ParameterList>& params) {
550 
551 #ifdef HAVE_TPETRA_MMM_TIMINGS
552  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
553  using Teuchos::TimeMonitor;
554  Teuchos::RCP<TimeMonitor> MM;
555 #endif
556 
557  // Lots and lots of typedefs
558  using Teuchos::RCP;
559 
560  // Options
561  std::string myalg("LTG");
562  if(!params.is_null()) {
563  if(params->isParameter("openmp: rap algorithm"))
564  myalg = params->get("openmp: rap algorithm",myalg);
565  }
566 
567  if(myalg == "LTG") {
568  // Use the LTG kernel if requested
569  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
570  }
571  else {
572  throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
573  }
574 
575 #ifdef HAVE_TPETRA_MMM_TIMINGS
576  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
577 #endif
578  C.fillComplete(C.getDomainMap(), C.getRangeMap());
579 
580 }
581 
582 
583 
584 /*********************************************************************************************************/
585 template<class Scalar,
586  class LocalOrdinal,
587  class GlobalOrdinal,
588  class LocalOrdinalViewType>
589 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
590 
591  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
592  const LocalOrdinalViewType & Acol2Prow,
593  const LocalOrdinalViewType & Acol2PIrow,
594  const LocalOrdinalViewType & Pcol2Accol,
595  const LocalOrdinalViewType & PIcol2Accol,
596  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
597  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
598  const std::string& label,
599  const Teuchos::RCP<Teuchos::ParameterList>& params) {
600 
601 
602 #ifdef HAVE_TPETRA_MMM_TIMINGS
603  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
604  using Teuchos::TimeMonitor;
605  Teuchos::RCP<TimeMonitor> MM;
606 #endif
607 
608  // Node-specific code
609  std::string nodename("OpenMP");
610 
611  // Options
612  std::string myalg("LTG");
613 
614  if(!params.is_null()) {
615  if(params->isParameter("openmp: ptap algorithm"))
616  myalg = params->get("openmp: ptap algorithm",myalg);
617  }
618 
619  if(myalg == "LTG") {
620  #ifdef HAVE_TPETRA_MMM_TIMINGS
621  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
622  #endif
623  // We don't need a kernel-level PTAP, we just transpose here
624  typedef RowMatrixTransposer<Scalar,LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> transposer_type;
625  transposer_type transposer (Pview.origMatrix,label+std::string("XP: "));
626  Teuchos::RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(new Teuchos::ParameterList);
627  if (!params.is_null())
628  transposeParams->set("compute global constants",
629  params->get("compute global constants: temporaries",
630  false));
631  Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans = transposer.createTransposeLocal(transposeParams);
632  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
633  Rview.origMatrix = Ptrans;
634  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
635  }
636  else {
637  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
638  }
639 }
640 
641 /*********************************************************************************************************/
642 template<class Scalar,
643  class LocalOrdinal,
644  class GlobalOrdinal,
645  class LocalOrdinalViewType>
646 void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
647 
648  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
649  const LocalOrdinalViewType & Acol2Prow,
650  const LocalOrdinalViewType & Acol2PIrow,
651  const LocalOrdinalViewType & Pcol2Accol,
652  const LocalOrdinalViewType & PIcol2Accol,
653  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
654  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
655  const std::string& label,
656  const Teuchos::RCP<Teuchos::ParameterList>& params) {
657 
658 
659 #ifdef HAVE_TPETRA_MMM_TIMINGS
660  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
661  using Teuchos::TimeMonitor;
662  Teuchos::RCP<TimeMonitor> MM;
663 #endif
664 
665  // Node-specific code
666  std::string nodename("OpenMP");
667 
668  // Options
669  std::string myalg("LTG");
670 
671  if(!params.is_null()) {
672  if(params->isParameter("openmp: ptap algorithm"))
673  myalg = params->get("openmp: ptap algorithm",myalg);
674  }
675 
676  if(myalg == "LTG") {
677  #ifdef HAVE_TPETRA_MMM_TIMINGS
678  MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
679  #endif
680  // We don't need a kernel-level PTAP, we just transpose here
681  typedef RowMatrixTransposer<Scalar,LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> transposer_type;
682  transposer_type transposer (Pview.origMatrix,label+std::string("XP: "));
683  Teuchos::RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(new Teuchos::ParameterList);
684  if (!params.is_null())
685  transposeParams->set("compute global constants",
686  params->get("compute global constants: temporaries",
687  false));
688  Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans = transposer.createTransposeLocal(transposeParams);
689  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
690  Rview.origMatrix = Ptrans;
691  ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
692  }
693  else {
694  throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
695  }
696  Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
697 }
698 
699 
700 }//MMdetails
701 }//Tpetra
702 
703 #endif//OpenMP
704 
705 #endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...