Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TpetraExt_MatrixMatrix_HIP.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
44 
45 #ifdef HAVE_TPETRA_INST_HIP
46 namespace Tpetra {
47 namespace MMdetails {
48 
49 /*********************************************************************************************************/
50 // MMM KernelWrappers for Partial Specialization to HIP
51 template<class Scalar,
52  class LocalOrdinal,
53  class GlobalOrdinal,
54  class LocalOrdinalViewType>
55 struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
56  static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
57  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
58  const LocalOrdinalViewType & Acol2Brow,
59  const LocalOrdinalViewType & Acol2Irow,
60  const LocalOrdinalViewType & Bcol2Ccol,
61  const LocalOrdinalViewType & Icol2Ccol,
62  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
63  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
64  const std::string& label = std::string(),
65  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66 
67 
68 
69  static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
70  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
71  const LocalOrdinalViewType & Acol2Brow,
72  const LocalOrdinalViewType & Acol2Irow,
73  const LocalOrdinalViewType & Bcol2Ccol,
74  const LocalOrdinalViewType & Icol2Ccol,
75  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
76  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
77  const std::string& label = std::string(),
78  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
79 
80 };
81 
82 // Jacobi KernelWrappers for Partial Specialization to HIP
83 template<class Scalar,
84  class LocalOrdinal,
85  class GlobalOrdinal, class LocalOrdinalViewType>
86 struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
87  static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
88  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
89  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
90  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
91  const LocalOrdinalViewType & Acol2Brow,
92  const LocalOrdinalViewType & Acol2Irow,
93  const LocalOrdinalViewType & Bcol2Ccol,
94  const LocalOrdinalViewType & Icol2Ccol,
95  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
96  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
97  const std::string& label = std::string(),
98  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
99 
100  static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
101  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
102  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
103  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
104  const LocalOrdinalViewType & Acol2Brow,
105  const LocalOrdinalViewType & Acol2Irow,
106  const LocalOrdinalViewType & Bcol2Ccol,
107  const LocalOrdinalViewType & Icol2Ccol,
108  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
109  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
110  const std::string& label = std::string(),
111  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
112 
113  static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
114  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
115  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
116  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
117  const LocalOrdinalViewType & Acol2Brow,
118  const LocalOrdinalViewType & Acol2Irow,
119  const LocalOrdinalViewType & Bcol2Ccol,
120  const LocalOrdinalViewType & Icol2Ccol,
121  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
122  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
123  const std::string& label = std::string(),
124  const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
125 };
126 
127 
128 /*********************************************************************************************************/
129 // AB NewMatrix Kernel wrappers (KokkosKernels/HIP Version)
130 template<class Scalar,
131  class LocalOrdinal,
132  class GlobalOrdinal,
133  class LocalOrdinalViewType>
134 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
135  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
136  const LocalOrdinalViewType & Acol2Brow,
137  const LocalOrdinalViewType & Acol2Irow,
138  const LocalOrdinalViewType & Bcol2Ccol,
139  const LocalOrdinalViewType & Icol2Ccol,
140  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
141  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
142  const std::string& label,
143  const Teuchos::RCP<Teuchos::ParameterList>& params) {
144 
145 
146 #ifdef HAVE_TPETRA_MMM_TIMINGS
147  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
148  using Teuchos::TimeMonitor;
149  Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPWrapper")))));
150 #endif
151  // Node-specific code
152  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
153  std::string nodename("HIP");
154 
155  // Lots and lots of typedefs
156  using Teuchos::RCP;
158  typedef typename KCRS::device_type device_t;
159  typedef typename KCRS::StaticCrsGraphType graph_t;
160  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
161  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
162  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
163  typedef typename KCRS::values_type::non_const_type scalar_view_t;
164  //typedef typename graph_t::row_map_type::const_type lno_view_t_const;
165 
166  // Options
167  int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
168  std::string myalg("SPGEMM_KK_MEMORY");
169  if(!params.is_null()) {
170  if(params->isParameter("hip: algorithm"))
171  myalg = params->get("hip: algorithm",myalg);
172  if(params->isParameter("hip: team work size"))
173  team_work_size = params->get("hip: team work size",team_work_size);
174  }
175 
176  // KokkosKernelsHandle
177  typedef KokkosKernels::Experimental::KokkosKernelsHandle<
178  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
179  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
180 
181  // Grab the Kokkos::SparseCrsMatrices
182  const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
183  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
184 
185  c_lno_view_t Arowptr = Amat.graph.row_map,
186  Browptr = Bmat.graph.row_map;
187  const lno_nnz_view_t Acolind = Amat.graph.entries,
188  Bcolind = Bmat.graph.entries;
189  const scalar_view_t Avals = Amat.values,
190  Bvals = Bmat.values;
191 
192  c_lno_view_t Irowptr;
193  lno_nnz_view_t Icolind;
194  scalar_view_t Ivals;
195  if(!Bview.importMatrix.is_null()) {
196  auto lclB = Bview.importMatrix->getLocalMatrixDevice();
197  Irowptr = lclB.graph.row_map;
198  Icolind = lclB.graph.entries;
199  Ivals = lclB.values;
200  }
201 
202 
203  // Get the algorithm mode
204  std::string alg = nodename+std::string(" algorithm");
205  // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
206  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
207  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
208 
209  // Merge the B and Bimport matrices
210  const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
211 
212 #ifdef HAVE_TPETRA_MMM_TIMINGS
213  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPCore"))));
214 #endif
215 
216  // Do the multiply on whatever we've got
217  typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
218  typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
219  typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
220 
221  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
222  lno_nnz_view_t entriesC;
223  scalar_view_t valuesC;
224  KernelHandle kh;
225  kh.create_spgemm_handle(alg_enum);
226  kh.set_team_work_size(team_work_size);
227 
228  KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
229 
230  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
231  if (c_nnz_size){
232  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
233  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
234  }
235  KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
236  kh.destroy_spgemm_handle();
237 
238 #ifdef HAVE_TPETRA_MMM_TIMINGS
239  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPSort"))));
240 #endif
241 
242  // Sort & set values
243  if (params.is_null() || params->get("sort entries",true))
244  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
245  C.setAllValues(row_mapC,entriesC,valuesC);
246 
247 #ifdef HAVE_TPETRA_MMM_TIMINGS
248  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPESFC"))));
249 #endif
250 
251  // Final Fillcomplete
252  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
253  labelList->set("Timer Label",label);
254  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
255  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
256  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
257 }
258 
259 
260 /*********************************************************************************************************/
261 template<class Scalar,
262  class LocalOrdinal,
263  class GlobalOrdinal,
264  class LocalOrdinalViewType>
265 void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
266  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
267  const LocalOrdinalViewType & targetMapToOrigRow_dev,
268  const LocalOrdinalViewType & targetMapToImportRow_dev,
269  const LocalOrdinalViewType & Bcol2Ccol_dev,
270  const LocalOrdinalViewType & Icol2Ccol_dev,
271  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
272  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
273  const std::string& label,
274  const Teuchos::RCP<Teuchos::ParameterList>& params) {
275 
276  // FIXME: Right now, this is a cut-and-paste of the serial kernel
277  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
278 
279 #ifdef HAVE_TPETRA_MMM_TIMINGS
280  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
281  using Teuchos::TimeMonitor;
282  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
283  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
284 #endif
285  using Teuchos::RCP;
286  using Teuchos::rcp;
287 
288 
289  // Lots and lots of typedefs
290  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
291  typedef typename KCRS::StaticCrsGraphType graph_t;
292  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
293  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
294  typedef typename KCRS::values_type::non_const_type scalar_view_t;
295 
296  typedef Scalar SC;
297  typedef LocalOrdinal LO;
298  typedef GlobalOrdinal GO;
299  typedef Node NO;
300  typedef Map<LO,GO,NO> map_type;
301  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
302  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
303  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
304 
305  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
306  // KDDKDD UVM Ideally, this function would run on device and use
307  // KDDKDD UVM KokkosKernels instead of this host implementation.
308  auto targetMapToOrigRow =
309  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
310  targetMapToOrigRow_dev);
311  auto targetMapToImportRow =
312  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
313  targetMapToImportRow_dev);
314  auto Bcol2Ccol =
315  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
316  Bcol2Ccol_dev);
317  auto Icol2Ccol =
318  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
319  Icol2Ccol_dev);
320 
321  // Sizes
322  RCP<const map_type> Ccolmap = C.getColMap();
323  size_t m = Aview.origMatrix->getLocalNumRows();
324  size_t n = Ccolmap->getLocalNumElements();
325 
326  // Grab the Kokkos::SparseCrsMatrices & inner stuff
327  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
328  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
329  const KCRS & Cmat = C.getLocalMatrixHost();
330 
331  c_lno_view_t Arowptr = Amat.graph.row_map,
332  Browptr = Bmat.graph.row_map,
333  Crowptr = Cmat.graph.row_map;
334  const lno_nnz_view_t Acolind = Amat.graph.entries,
335  Bcolind = Bmat.graph.entries,
336  Ccolind = Cmat.graph.entries;
337  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
338  scalar_view_t Cvals = Cmat.values;
339 
340  c_lno_view_t Irowptr;
341  lno_nnz_view_t Icolind;
342  scalar_view_t Ivals;
343  if(!Bview.importMatrix.is_null()) {
344  auto lclB = Bview.importMatrix->getLocalMatrixHost();
345  Irowptr = lclB.graph.row_map;
346  Icolind = lclB.graph.entries;
347  Ivals = lclB.values;
348  }
349 
350 #ifdef HAVE_TPETRA_MMM_TIMINGS
351  MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
352 #endif
353 
354  // Classic csr assembly (low memory edition)
355  // mfh 27 Sep 2016: The c_status array is an implementation detail
356  // of the local sparse matrix-matrix multiply routine.
357 
358  // The status array will contain the index into colind where this entry was last deposited.
359  // c_status[i] < CSR_ip - not in the row yet
360  // c_status[i] >= CSR_ip - this is the entry where you can find the data
361  // We start with this filled with INVALID's indicating that there are no entries yet.
362  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
363  std::vector<size_t> c_status(n, ST_INVALID);
364 
365  // For each row of A/C
366  size_t CSR_ip = 0, OLD_ip = 0;
367  for (size_t i = 0; i < m; i++) {
368  // First fill the c_status array w/ locations where we're allowed to
369  // generate nonzeros for this row
370  OLD_ip = Crowptr[i];
371  CSR_ip = Crowptr[i+1];
372  for (size_t k = OLD_ip; k < CSR_ip; k++) {
373  c_status[Ccolind[k]] = k;
374 
375  // Reset values in the row of C
376  Cvals[k] = SC_ZERO;
377  }
378 
379  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
380  LO Aik = Acolind[k];
381  const SC Aval = Avals[k];
382  if (Aval == SC_ZERO)
383  continue;
384 
385  if (targetMapToOrigRow[Aik] != LO_INVALID) {
386  // Local matrix
387  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
388 
389  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
390  LO Bkj = Bcolind[j];
391  LO Cij = Bcol2Ccol[Bkj];
392 
393  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
394  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
395  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
396 
397  Cvals[c_status[Cij]] += Aval * Bvals[j];
398  }
399 
400  } else {
401  // Remote matrix
402  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
403  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
404  LO Ikj = Icolind[j];
405  LO Cij = Icol2Ccol[Ikj];
406 
407  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
408  std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
409  "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
410 
411  Cvals[c_status[Cij]] += Aval * Ivals[j];
412  }
413  }
414  }
415  }
416 
417  C.fillComplete(C.getDomainMap(), C.getRangeMap());
418 }
419 
420 /*********************************************************************************************************/
421 template<class Scalar,
422  class LocalOrdinal,
423  class GlobalOrdinal,
424  class LocalOrdinalViewType>
425 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
426  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
427  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
428  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
429  const LocalOrdinalViewType & Acol2Brow,
430  const LocalOrdinalViewType & Acol2Irow,
431  const LocalOrdinalViewType & Bcol2Ccol,
432  const LocalOrdinalViewType & Icol2Ccol,
433  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
434  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
435  const std::string& label,
436  const Teuchos::RCP<Teuchos::ParameterList>& params) {
437 
438 #ifdef HAVE_TPETRA_MMM_TIMINGS
439  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
440  using Teuchos::TimeMonitor;
441  Teuchos::RCP<TimeMonitor> MM;
442 #endif
443 
444  // Node-specific code
445  using Teuchos::RCP;
446 
447  // Options
448  //int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
449  std::string myalg("KK");
450  if(!params.is_null()) {
451  if(params->isParameter("hip: jacobi algorithm"))
452  myalg = params->get("hip: jacobi algorithm",myalg);
453  }
454 
455  if(myalg == "MSAK") {
456  ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
457  }
458  else if(myalg == "KK") {
459  jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
460  }
461  else {
462  throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
463  }
464 
465 #ifdef HAVE_TPETRA_MMM_TIMINGS
466  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix HIPESFC"))));
467 #endif
468 
469  // Final Fillcomplete
470  RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
471  labelList->set("Timer Label",label);
472  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
473 
474  // NOTE: MSAK already fillCompletes, so we have to check here
475  if(!C.isFillComplete()) {
476  RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
477  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
478  }
479 
480 }
481 
482 
483 
484 /*********************************************************************************************************/
485 template<class Scalar,
486  class LocalOrdinal,
487  class GlobalOrdinal,
488  class LocalOrdinalViewType>
489 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
490  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
491  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
492  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
493  const LocalOrdinalViewType & targetMapToOrigRow_dev,
494  const LocalOrdinalViewType & targetMapToImportRow_dev,
495  const LocalOrdinalViewType & Bcol2Ccol_dev,
496  const LocalOrdinalViewType & Icol2Ccol_dev,
497  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
498  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
499  const std::string& label,
500  const Teuchos::RCP<Teuchos::ParameterList>& params) {
501 
502  // FIXME: Right now, this is a cut-and-paste of the serial kernel
503  typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
504 
505 #ifdef HAVE_TPETRA_MMM_TIMINGS
506  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
507  using Teuchos::TimeMonitor;
508  Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse HIPCore"))));
509  Teuchos::RCP<Teuchos::TimeMonitor> MM2;
510 #endif
511  using Teuchos::RCP;
512  using Teuchos::rcp;
513 
514  // Lots and lots of typedefs
515  typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
516  typedef typename KCRS::StaticCrsGraphType graph_t;
517  typedef typename graph_t::row_map_type::const_type c_lno_view_t;
518  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
519  typedef typename KCRS::values_type::non_const_type scalar_view_t;
520  typedef typename scalar_view_t::memory_space scalar_memory_space;
521 
522  typedef Scalar SC;
523  typedef LocalOrdinal LO;
524  typedef GlobalOrdinal GO;
525  typedef Node NO;
526  typedef Map<LO,GO,NO> map_type;
527  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
528  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
529  const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
530 
531  // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
532  // KDDKDD UVM Ideally, this function would run on device and use
533  // KDDKDD UVM KokkosKernels instead of this host implementation.
534  auto targetMapToOrigRow =
535  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
536  targetMapToOrigRow_dev);
537  auto targetMapToImportRow =
538  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
539  targetMapToImportRow_dev);
540  auto Bcol2Ccol =
541  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
542  Bcol2Ccol_dev);
543  auto Icol2Ccol =
544  Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
545  Icol2Ccol_dev);
546 
547 
548  // Sizes
549  RCP<const map_type> Ccolmap = C.getColMap();
550  size_t m = Aview.origMatrix->getLocalNumRows();
551  size_t n = Ccolmap->getLocalNumElements();
552 
553  // Grab the Kokkos::SparseCrsMatrices & inner stuff
554  const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
555  const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
556  const KCRS & Cmat = C.getLocalMatrixHost();
557 
558  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
559  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
560  const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
561  scalar_view_t Cvals = Cmat.values;
562 
563  c_lno_view_t Irowptr;
564  lno_nnz_view_t Icolind;
565  scalar_view_t Ivals;
566  if(!Bview.importMatrix.is_null()) {
567  auto lclB = Bview.importMatrix->getLocalMatrixHost();
568  Irowptr = lclB.graph.row_map;
569  Icolind = lclB.graph.entries;
570  Ivals = lclB.values;
571  }
572 
573  // Jacobi-specific inner stuff
574  auto Dvals =
575  Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
576 
577 #ifdef HAVE_TPETRA_MMM_TIMINGS
578  MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse HIPCore - Compare"))));
579 #endif
580 
581  // The status array will contain the index into colind where this entry was last deposited.
582  // c_status[i] < CSR_ip - not in the row yet
583  // c_status[i] >= CSR_ip - this is the entry where you can find the data
584  // We start with this filled with INVALID's indicating that there are no entries yet.
585  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
586  std::vector<size_t> c_status(n, ST_INVALID);
587 
588  // For each row of A/C
589  size_t CSR_ip = 0, OLD_ip = 0;
590  for (size_t i = 0; i < m; i++) {
591 
592  // First fill the c_status array w/ locations where we're allowed to
593  // generate nonzeros for this row
594  OLD_ip = Crowptr[i];
595  CSR_ip = Crowptr[i+1];
596  for (size_t k = OLD_ip; k < CSR_ip; k++) {
597  c_status[Ccolind[k]] = k;
598 
599  // Reset values in the row of C
600  Cvals[k] = SC_ZERO;
601  }
602 
603  SC minusOmegaDval = -omega*Dvals(i,0);
604 
605  // Entries of B
606  for (size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
607  Scalar Bval = Bvals[j];
608  if (Bval == SC_ZERO)
609  continue;
610  LO Bij = Bcolind[j];
611  LO Cij = Bcol2Ccol[Bij];
612 
613  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
614  std::runtime_error, "Trying to insert a new entry into a static graph");
615 
616  Cvals[c_status[Cij]] = Bvals[j];
617  }
618 
619  // Entries of -omega * Dinv * A * B
620  for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
621  LO Aik = Acolind[k];
622  const SC Aval = Avals[k];
623  if (Aval == SC_ZERO)
624  continue;
625 
626  if (targetMapToOrigRow[Aik] != LO_INVALID) {
627  // Local matrix
628  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
629 
630  for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
631  LO Bkj = Bcolind[j];
632  LO Cij = Bcol2Ccol[Bkj];
633 
634  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
635  std::runtime_error, "Trying to insert a new entry into a static graph");
636 
637  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
638  }
639 
640  } else {
641  // Remote matrix
642  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
643  for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
644  LO Ikj = Icolind[j];
645  LO Cij = Icol2Ccol[Ikj];
646 
647  TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
648  std::runtime_error, "Trying to insert a new entry into a static graph");
649 
650  Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
651  }
652  }
653  }
654  }
655 
656 #ifdef HAVE_TPETRA_MMM_TIMINGS
657  MM2= Teuchos::null;
658  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
659 #endif
660 
661  C.fillComplete(C.getDomainMap(), C.getRangeMap());
662 
663 }
664 
665 /*********************************************************************************************************/
666 template<class Scalar,
667  class LocalOrdinal,
668  class GlobalOrdinal,
669  class LocalOrdinalViewType>
670 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
671  const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
672  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
673  CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
674  const LocalOrdinalViewType & Acol2Brow,
675  const LocalOrdinalViewType & Acol2Irow,
676  const LocalOrdinalViewType & Bcol2Ccol,
677  const LocalOrdinalViewType & Icol2Ccol,
678  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
679  Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
680  const std::string& label,
681  const Teuchos::RCP<Teuchos::ParameterList>& params) {
682 
683 #ifdef HAVE_TPETRA_MMM_TIMINGS
684  std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
685  using Teuchos::TimeMonitor;
686  Teuchos::RCP<TimeMonitor> MM;
687 #endif
688 
689  // Check if the diagonal entries exist in debug mode
690  const bool debug = Tpetra::Details::Behavior::debug();
691  if(debug) {
692 
693  auto rowMap = Aview.origMatrix->getRowMap();
694  Tpetra::Vector<Scalar> diags(rowMap);
695  Aview.origMatrix->getLocalDiagCopy(diags);
696  size_t diagLength = rowMap->getLocalNumElements();
697  Teuchos::Array<Scalar> diagonal(diagLength);
698  diags.get1dCopy(diagonal());
699 
700  for(size_t i = 0; i < diagLength; ++i) {
701  TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
702  std::runtime_error,
703  "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
704  "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
705  }
706  }
707 
708  // Usings
709  using device_t = typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
711  using graph_t = typename matrix_t::StaticCrsGraphType;
712  using lno_view_t = typename graph_t::row_map_type::non_const_type;
713  using c_lno_view_t = typename graph_t::row_map_type::const_type;
714  using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
715  using scalar_view_t = typename matrix_t::values_type::non_const_type;
716 
717  // KokkosKernels handle
718  using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
719  typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
720  typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
721 
722  // Get the rowPtr, colInd and vals of importMatrix
723  c_lno_view_t Irowptr;
724  lno_nnz_view_t Icolind;
725  scalar_view_t Ivals;
726  if(!Bview.importMatrix.is_null()) {
727  auto lclB = Bview.importMatrix->getLocalMatrixDevice();
728  Irowptr = lclB.graph.row_map;
729  Icolind = lclB.graph.entries;
730  Ivals = lclB.values;
731  }
732 
733  // Merge the B and Bimport matrices
734  const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
735 
736  // Get the properties and arrays of input matrices
737  const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
738  const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
739 
740  typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
741  typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
742  typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
743 
744  c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
745  const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
746  const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
747 
748  // Arrays of the output matrix
749  lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
750  lno_nnz_view_t entriesC;
751  scalar_view_t valuesC;
752 
753  // Options
754  int team_work_size = 16;
755  std::string myalg("SPGEMM_KK_MEMORY");
756  if(!params.is_null()) {
757  if(params->isParameter("hip: algorithm"))
758  myalg = params->get("hip: algorithm",myalg);
759  if(params->isParameter("hip: team work size"))
760  team_work_size = params->get("hip: team work size",team_work_size);
761  }
762 
763  // Get the algorithm mode
764  std::string nodename("HIP");
765  std::string alg = nodename + std::string(" algorithm");
766  if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
767  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
768 
769 
770  // KokkosKernels call
771  handle_t kh;
772  kh.create_spgemm_handle(alg_enum);
773  kh.set_team_work_size(team_work_size);
774 
775  KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
776  Arowptr, Acolind, false,
777  Browptr, Bcolind, false,
778  row_mapC);
779 
780  size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
781  if (c_nnz_size){
782  entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
783  valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
784  }
785 
786  KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
787  Arowptr, Acolind, Avals, false,
788  Browptr, Bcolind, Bvals, false,
789  row_mapC, entriesC, valuesC,
790  omega, Dinv.getLocalViewDevice(Access::ReadOnly));
791  kh.destroy_spgemm_handle();
792 
793 #ifdef HAVE_TPETRA_MMM_TIMINGS
794  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix HIPSort"))));
795 #endif
796 
797  // Sort & set values
798  if (params.is_null() || params->get("sort entries",true))
799  Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
800  C.setAllValues(row_mapC,entriesC,valuesC);
801 
802 #ifdef HAVE_TPETRA_MMM_TIMINGS
803  MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix HIPESFC"))));
804 #endif
805 
806  // Final Fillcomplete
807  Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
808  labelList->set("Timer Label",label);
809  if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
810  Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
811  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
812 }
813 
814  }//MMdetails
815 }//Tpetra
816 
817 #endif//HIP
818 
819 #endif
static bool debug()
Whether Tpetra is in debug mode.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
A distributed dense vector.