Panzer  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Panzer_Interpolation.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Panzer: A partial differential equation assembly
4 // engine for strongly coupled complex multiphysics systems
5 //
6 // Copyright 2011 NTESS and the Panzer contributors.
7 // SPDX-License-Identifier: BSD-3-Clause
8 // *****************************************************************************
9 // @HEADER
10 
11 #include "Panzer_Interpolation.hpp"
14 #include "Intrepid2_OrientationTools.hpp"
15 #include "Intrepid2_LagrangianInterpolation.hpp"
16 #ifdef PANZER_HAVE_EPETRA_STACK
17 #include "Epetra_MpiComm.h"
18 #endif
19 
20 // #define PANZER_INTERPOLATION_DEBUG_OUTPUT = 1
21 
22 namespace panzer {
23 
24 template <class Scalar,
25  class LocalOrdinal,
26  class GlobalOrdinal,
27  class Node>
29 removeSmallEntries(Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& A,
31  using crs_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
32  using row_ptr_type = typename crs_matrix::local_graph_device_type::row_map_type::non_const_type;
33  using col_idx_type = typename crs_matrix::local_graph_device_type::entries_type::non_const_type;
34  using vals_type = typename crs_matrix::local_matrix_device_type::values_type;
35 
36 #if KOKKOS_VERSION >= 40799
37  using ATS = KokkosKernels::ArithTraits<Scalar>;
38 #else
39  using ATS = Kokkos::ArithTraits<Scalar>;
40 #endif
41  using impl_SC = typename ATS::val_type;
42 #if KOKKOS_VERSION >= 40799
43  using impl_ATS = KokkosKernels::ArithTraits<impl_SC>;
44 #else
45  using impl_ATS = Kokkos::ArithTraits<impl_SC>;
46 #endif
47 
48  auto lclA = A->getLocalMatrixDevice();
49 
50  auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1);
51 
52  Kokkos::parallel_for(
53  "removeSmallEntries::rowptr1",
54  Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
55  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
56  auto row = lclA.row(rlid);
57  for (LocalOrdinal k = 0; k < row.length; ++k) {
58  if (impl_ATS::magnitude(row.value(k)) > tol) {
59  rowptr(rlid + 1) += 1;
60  }
61  }
62  });
63  LocalOrdinal nnz;
64  Kokkos::parallel_scan(
65  "removeSmallEntries::rowptr2",
66  Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
67  KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
68  partial_nnz += rowptr(rlid + 1);
69  if (is_final)
70  rowptr(rlid + 1) = partial_nnz;
71  },
72  nnz);
73 
74  auto idx = col_idx_type("idx", nnz);
75  auto vals = vals_type("vals", nnz);
76 
77  Kokkos::parallel_for(
78  "removeSmallEntries::indicesValues",
79  Kokkos::RangePolicy<LocalOrdinal>(0, lclA.numRows()),
80  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
81  auto row = lclA.row(rlid);
82  auto I = rowptr(rlid);
83  for (LocalOrdinal k = 0; k < row.length; ++k) {
84  if (impl_ATS::magnitude(row.value(k)) > tol) {
85  idx(I) = row.colidx(k);
86  vals(I) = row.value(k);
87  I += 1;
88  }
89  }
90  });
91  Kokkos::fence();
92 
93  auto newA = Teuchos::rcp(new crs_matrix(A->getRowMap(), A->getColMap(), rowptr, idx, vals));
94  newA->fillComplete(A->getDomainMap(),
95  A->getRangeMap());
96  return newA;
97 }
98 
99 
101  const std::string &domain_basis_name, const std::string &range_basis_name,
102  Intrepid2::EOperator op, size_t worksetSize,
103  const bool matrixFree)
104 {
105  using Teuchos::RCP;
106  using Teuchos::rcp;
107  using Teuchos::rcp_dynamic_cast;
108 
109  using Scalar = double;
110 
112 #ifdef PANZER_HAVE_EPETRA_STACK
113  using epetraBlockedLinObjFactory = typename panzer::BlockedEpetraLinearObjFactory<panzer::Traits, LocalOrdinal>;
114 #endif
115  using UGI = panzer::GlobalIndexer;
116 
117  // must be able to cast to a block linear object factory
118  RCP<const tpetraBlockedLinObjFactory > tblof = rcp_dynamic_cast<const tpetraBlockedLinObjFactory >(linObjFactory);
119 #ifdef PANZER_HAVE_EPETRA_STACK
120  RCP<const epetraBlockedLinObjFactory > eblof = rcp_dynamic_cast<const epetraBlockedLinObjFactory >(linObjFactory);
121 #endif
122 
124  if (tblof != Teuchos::null) {
125  blockedDOFMngr = tblof->getGlobalIndexer();
126 #ifdef PANZER_HAVE_EPETRA_STACK
127  } else if (eblof != Teuchos::null) {
128  blockedDOFMngr = eblof->getGlobalIndexer();
129 #endif
130  } else {
131  TEUCHOS_ASSERT(false);
132  }
133 
134  // get global indexers for domain and range dofs
135  std::vector<RCP<UGI> > fieldDOFMngrs = blockedDOFMngr->getFieldDOFManagers();
136  int domainFieldNum = blockedDOFMngr->getFieldNum(domain_basis_name);
137  int rangeFieldNum = blockedDOFMngr->getFieldNum(range_basis_name);
138  int domainBlockIndex = blockedDOFMngr->getFieldBlock(domainFieldNum);
139  int rangeBlockIndex = blockedDOFMngr->getFieldBlock(rangeFieldNum);
140  RCP<panzer::DOFManager> domain_ugi = rcp_dynamic_cast<panzer::DOFManager>(blockedDOFMngr->getFieldDOFManagers()[domainBlockIndex], true);
141  RCP<panzer::DOFManager> range_ugi = rcp_dynamic_cast<panzer::DOFManager>(blockedDOFMngr->getFieldDOFManagers()[rangeBlockIndex], true);
142 
143  RCP<const panzer::ConnManager> conn = blockedDOFMngr->getConnManager();
144 
145  return buildInterpolation(conn, domain_ugi, range_ugi, domain_basis_name, range_basis_name, op, worksetSize,
146  /*forceVectorial=*/false,
147  /*useTpetra=*/tblof != Teuchos::null,
148  matrixFree);
149 }
150 
151 
153  const Teuchos::RCP<panzer::DOFManager> &domain_ugi,
154  const Teuchos::RCP<panzer::DOFManager> &range_ugi,
155  const std::string& domain_basis_name,
156  const std::string& range_basis_name,
157  Intrepid2::EOperator op,
158  size_t worksetSize,
159  const bool force_vectorial,
160  const bool useTpetra,
161  const bool matrixFree)
162 {
163  using Teuchos::RCP;
164  using Teuchos::rcp;
165  using Teuchos::rcp_dynamic_cast;
166 
167  using Scalar = double;
168 
169  using STS = Teuchos::ScalarTraits<Scalar>;
170 #if KOKKOS_VERSION >= 40799
171  using KAT = KokkosKernels::ArithTraits<Scalar>;
172 #else
173  using KAT = Kokkos::ArithTraits<Scalar>;
174 #endif
176 
177  using DeviceSpace = PHX::Device;
178  using HostSpace = Kokkos::HostSpace;
179  using ots = Intrepid2::OrientationTools<DeviceSpace>;
180  using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
181  using DynRankDeviceView = Kokkos::DynRankView<double, DeviceSpace>;
182 
183  using tp_graph = Tpetra::CrsGraph<LocalOrdinal, GlobalOrdinal>;
184  using tp_matrix = Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal>;
185  using tp_map = Tpetra::Map<LocalOrdinal, GlobalOrdinal>;
186 #ifdef PANZER_HAVE_EPETRA_STACK
187  using ep_matrix = Epetra_CrsMatrix;
188  using ep_map = Epetra_Map;
189 #endif
190 
191  if (matrixFree) {
192  TEUCHOS_ASSERT(useTpetra);
193  TEUCHOS_ASSERT(!force_vectorial);
194  auto mfOp = rcp(new MatrixFreeInterpolationOp<Scalar,LocalOrdinal,GlobalOrdinal>(conn, domain_ugi, range_ugi, domain_basis_name, range_basis_name, op, worksetSize));
195  return Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getRangeMap()),
196  Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(mfOp->getDomainMap()),
197  mfOp);
198  }
199 
200  // get the domain and range bases
201  auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
202  auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,true)->getIntrepidBasis();
203  auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
204  auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,true)->getIntrepidBasis();
205 
206  // cardinalities
207  const size_t domainCardinality = domain_basis->getCardinality();
208  const size_t rangeCardinality = range_basis->getCardinality();
209 
210  const int dim = range_basis->getBaseCellTopology().getDimension();
211 
212  if (op == Intrepid2::OPERATOR_VALUE) {
213  TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
214  TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
215  }
216 
217  // Create the global interp matrix.
218  RCP<const tp_map> tp_rangemap;
219  RCP<const tp_map> tp_domainmap;
220  RCP<const tp_map> tp_rowmap;
221  RCP<const tp_map> tp_colmap;
222  RCP<tp_matrix> tp_interp_matrix;
223  typename tp_matrix::local_matrix_device_type lcl_tp_interp_matrix;
224 #ifdef PANZER_HAVE_EPETRA_STACK
225  RCP<const ep_map> ep_rangemap;
226  RCP<const ep_map> ep_domainmap;
227  RCP<const ep_map> ep_rowmap;
228  RCP<const ep_map> ep_colmap;
229  RCP<ep_matrix> ep_interp_matrix;
230 #endif
231 
232  auto rangeElementLIDs_d = range_ugi->getLIDs();
233  auto domainElementLIDs_d = domain_ugi->getLIDs();
234 
235  RCP<Thyra::LinearOpBase<Scalar> > thyra_interp;
236  size_t maxNumElementsPerBlock = 0;
237  LocalOrdinal minLocalIndex = 0;
238  LocalOrdinal maxLocalIndex = 0;
239  if (useTpetra) {
240  // build maps
241  std::vector<GlobalOrdinal> gids;
242  range_ugi->getOwnedIndices(gids);
243  tp_rowmap = rcp(new tp_map(OT::invalid(), gids.data(), static_cast<LocalOrdinal>(gids.size()), OT::zero(), range_ugi->getComm()));
244  tp_rangemap = tp_rowmap;
245  domain_ugi->getOwnedIndices(gids);
246  tp_domainmap = rcp(new tp_map(OT::invalid(), gids.data(), static_cast<LocalOrdinal>(gids.size()), OT::zero(), domain_ugi->getComm()));
247  domain_ugi->getOwnedAndGhostedIndices(gids);
248  tp_colmap = rcp(new tp_map(OT::invalid(), gids.data(), static_cast<LocalOrdinal>(gids.size()), OT::zero(), domain_ugi->getComm()));
249 
250  minLocalIndex = tp_rowmap->getMinLocalIndex();
251  maxLocalIndex = tp_rowmap->getMaxLocalIndex();
252 
253  // estimate number of entries per row
254  // This is an upper bound, as we are counting dofs that are on shared nodes, edges, faces more than once.
255  using dv = Kokkos::DualView<size_t*, typename tp_graph::device_type>;
256  dv numEntriesPerRow("numEntriesPerRow", tp_rowmap->getLocalNumElements());
257  {
258  auto numEntriesPerRow_d = numEntriesPerRow.view_device();
259 
260  // loop over element blocks
261  std::vector<std::string> elementBlockIds;
262  range_ugi->getElementBlockIds(elementBlockIds);
263  for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
264 
265  // loop over elements
266  std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
267  Kokkos::View<int *, HostSpace> elementIds_h(elementIds.data(), elementIds.size());
268  Kokkos::View<int *, DeviceSpace> elementIds_d("elementIds_d", elementIds_h.extent(0));
269  Kokkos::deep_copy(elementIds_d, elementIds_h);
270  maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
271 
272  Kokkos::parallel_for("MiniEM_Interpolation::numEntriesPerRow",
273  Kokkos::RangePolicy<size_t, typename tp_matrix::node_type::execution_space>(0, elementIds.size()),
274  KOKKOS_LAMBDA(const size_t elemIter) {
275  auto elemId = elementIds_d(elemIter);
276 
277  // get IDs for range dofs
278  auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
279 
280  // loop over range LIDs
281  for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
282  const LocalOrdinal range_row = rangeLIDs_d(rangeIter);
283  const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
284  if (isOwned)
285  Kokkos::atomic_add(&numEntriesPerRow_d(range_row), domainCardinality);
286  } //end range LID loop
287  });
288  } // blocks loop
289  numEntriesPerRow.template modify<typename dv::t_dev>();
290  numEntriesPerRow.template sync<typename dv::t_host>();
291  }
292 
293  // Set up graph
294  auto tp_interp_graph = rcp(new tp_graph(tp_rowmap, tp_colmap, numEntriesPerRow));
295 
296  { // This runs on host
297  Kokkos::View<LocalOrdinal**, HostSpace> rangeElementLIDs_h("rangeElementLIDs_h", rangeElementLIDs_d.extent(0), rangeCardinality);
298  Kokkos::View<LocalOrdinal**, HostSpace> domainElementLIDs_h("domainElementLIDs_h", domainElementLIDs_d.extent(0), domainCardinality);
299  Kokkos::deep_copy(rangeElementLIDs_h, rangeElementLIDs_d);
300  Kokkos::deep_copy(domainElementLIDs_h, domainElementLIDs_d);
301 
302  // loop over element blocks
303  std::vector<std::string> elementBlockIds;
304  range_ugi->getElementBlockIds(elementBlockIds);
305  for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
306 
307  // loop over elements
308  std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
309  maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
310  for(std::size_t elemIter = 0; elemIter < elementIds.size(); ++elemIter) {
311  auto elemId = elementIds[elemIter];
312 
313  // get IDs for range dofs
314  auto rangeLIDs_h = Kokkos::subview(rangeElementLIDs_h, elemId, Kokkos::ALL());
315  auto domainLIDs_h = Kokkos::subview(domainElementLIDs_h, elemId, Kokkos::ALL());
316 
317  // loop over range LIDs
318  for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
319  const LocalOrdinal range_row = rangeLIDs_h(rangeIter);
320  const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
321  if (isOwned) {
322  Teuchos::ArrayView<LocalOrdinal> domainLIDs_av = Teuchos::ArrayView<LocalOrdinal>(domainLIDs_h.data(), domainLIDs_h.extent_int(0));
323  tp_interp_graph->insertLocalIndices(range_row, domainLIDs_av);
324  }
325  } //end range LID loop
326  } // elements loop
327  } // blocks loop
328  }
329 
331  pl->set("Optimize Storage", true);
332  tp_interp_graph->fillComplete(tp_domainmap, tp_rangemap, pl);
333 
334  tp_interp_matrix = rcp(new tp_matrix(tp_interp_graph));
335  lcl_tp_interp_matrix = tp_interp_matrix->getLocalMatrixDevice();
336  }
337 #ifdef PANZER_HAVE_EPETRA_STACK
338  else {
339 
340  const RCP<const Teuchos::MpiComm<int> > mpiComm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(range_ugi->getComm());
341  auto ep_comm = Teuchos::rcp(new Epetra_MpiComm(*mpiComm->getRawMpiComm()));
342  std::vector<GlobalOrdinal> gids;
343  range_ugi->getOwnedIndices(gids);
344  ep_rowmap = rcp(new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
345  ep_rangemap = ep_rowmap;
346  domain_ugi->getOwnedIndices(gids);
347  ep_domainmap = rcp(new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
348  domain_ugi->getOwnedAndGhostedIndices(gids);
349  ep_colmap = rcp(new ep_map(OT::invalid(), static_cast<LocalOrdinal>(gids.size()), gids.data(), OT::zero(), *ep_comm));
350 
351  {
352  // loop over element blocks
353  std::vector<std::string> elementBlockIds;
354  range_ugi->getElementBlockIds(elementBlockIds);
355  for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
356 
357  // loop over elements
358  std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
359  maxNumElementsPerBlock = std::max(maxNumElementsPerBlock, elementIds.size());
360  }
361  }
362 
363  // TODO: Fix this.
364  size_t nnzPerRowEstimate = 25*domainCardinality;
365 
366  ep_interp_matrix = rcp(new ep_matrix(Copy, *ep_rowmap, *ep_colmap, static_cast<int>(nnzPerRowEstimate), /*StaticProfile=*/true));
367 
368  RCP<const Thyra::LinearOpBase<double> > th_ep_interp = Thyra::epetraLinearOp(ep_interp_matrix,
369  Thyra::NOTRANS,
370  Thyra::EPETRA_OP_APPLY_APPLY,
371  Thyra::EPETRA_OP_ADJOINT_SUPPORTED,
372  Thyra::create_VectorSpace(ep_rangemap),
373  Thyra::create_VectorSpace(ep_domainmap));
374  thyra_interp = Teuchos::rcp_const_cast<Thyra::LinearOpBase<double> >(th_ep_interp);
375  }
376 #endif
377 
378  // allocate some views
379  int numCells;
380  if (maxNumElementsPerBlock > 0)
381  numCells = static_cast<int>(std::min(maxNumElementsPerBlock, worksetSize));
382  else
383  numCells = static_cast<int>(worksetSize);
384 
385  DynRankDeviceView range_dofCoords_d("range_dofCoords_d", rangeCardinality, dim);
386  DynRankDeviceView basisCoeffsLIOriented_d("basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
387 
388  // the ranks of these depend on dimension
389  DynRankDeviceView valuesAtDofCoordsNonOriented_d;
390  DynRankDeviceView valuesAtDofCoordsOriented_d;
391 
392  if (!force_vectorial) {
393  // Let Intrepid2 give us the correctly dimensioned view, then build one with +1 ranks and extent(0) == numCells
394  auto temp = domain_basis->allocateOutputView(static_cast<int>(rangeCardinality), op);
395 
396  // These view have dimensions
397  // numCells, numFields=domainCardinality, numPoints=rangeCardinality, (spatialDim)
398  //
399  if (temp.rank() == 3) {
400  valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
401  valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
402  } else {
403  valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
404  valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
405  }
406  } else {
407  valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", domainCardinality, rangeCardinality, dim);
408  valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, domainCardinality, rangeCardinality, dim);
409  }
410 
411  int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
412  TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
413 
415 
416  // range dof coordinates
417  range_basis->getDofCoords(range_dofCoords_d);
418 
419  // compute values of op * (domain basis) at range dof coords on reference element
420  domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
421 
422  // get block ids
423  std::vector<std::string> elementBlockIds;
424  range_ugi->getElementBlockIds(elementBlockIds);
425 
426  // get orientations for all blocks
427  std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
428  buildIntrepidOrientations(elementBlockIds, *conn, orientations);
429 
430  // loop over element blocks
431  for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
432 
433  auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
434  auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
435 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
436  std::cout << "rangeOffsets_d" << std::endl;
437  for (int i = 0; i < rangeCardinality; i++)
438  std::cout << rangeOffsets_d(i) << " ";
439  std::cout << std::endl;
440  std::cout << "domainOffsets_d" << std::endl;
441  for (int i = 0; i < domainCardinality; i++)
442  std::cout << domainOffsets_d(i) << " ";
443  std::cout << std::endl;
444 #endif
445 
446  // get element ids
448  {
449  std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
450  Kokkos::View<int *, HostSpace> elementIds_h(elementIds.data(), elementIds.size());
451  elementIds_d = Kokkos::View<int *, DeviceSpace>("elementIds_d", elementIds_h.extent(0));
452  Kokkos::deep_copy(elementIds_d, elementIds_h);
453  }
454 
455  // get element orientations
456  typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace> elemOrts_d ("elemOrts_d", elementIds_d.extent(0));
457  {
458  // copy orientations to device
459  auto blockOrientations = orientations[elementBlockIds[blockIter]];
460  Kokkos::View<Intrepid2::Orientation*, HostSpace> elemOrts_h(blockOrientations.data(), blockOrientations.size());
461  Kokkos::deep_copy(elemOrts_d, elemOrts_h);
462  }
463 
464  // loop over element worksets
465  for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
466 
467  int endCellRange =
468  std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
469  Teuchos::as<int>(elemIter));
470 
471  // get subviews on workset
472  auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
473  auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
474  // Last workset might be shorter.
475  auto worksetRange = Kokkos::make_pair(0, endCellRange);
476  auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
477  auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
478 
479  // apply orientations for domain basis
480  // shuffles things in the second dimension, i.e. wrt domain basis
481  ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
482  valuesAtDofCoordsNonOriented_d,
483  elemOrtsWorkset_d,
484  domain_basis.get());
485 
486  // get basis coefficients of domain basis functions wrt range basis
487  for(size_t domainIter=0; domainIter<domainCardinality; domainIter++)
488  // Get basis coeffs wrt range basis on reference element.
489  // basisCoeffsLI has dimensions (numCells, numFields=rangeCardinality, domainCardinality)
490  li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
491  Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
492  range_basis.get(),
493  elemOrtsWorkset_d);
494 
495 #ifdef PANZER_HAVE_EPETRA_STACK
496  if (!useTpetra) { // Epetra fill
497 
498  Kokkos::View<LocalOrdinal*,DeviceSpace> indices_d("indices", domainCardinality);
499  Kokkos::View<Scalar*, DeviceSpace> values_d ("values", domainCardinality);
500 
501 
502  for (int cellNo = 0; cellNo < endCellRange; cellNo++) {
503  auto elemId = elementIds_d(elemIter+cellNo);
504 
505  // get IDs for range and domain dofs
506  auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
507  auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
508 
509  // loop over range LIDs
510  for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
511  const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
512  const bool isOwned = ep_rowmap->MyLID(range_row);
513  if (isOwned) {
514  // filter entries for zeros
515  LocalOrdinal rowNNZ = 0;
516  for(size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
517  Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
518  if (KAT::magnitude(val) > entryFilterTol) {
519  indices_d(rowNNZ) = domainLIDs_d(domainOffsets_d(domainIter));
520  values_d(rowNNZ) = val;
521  ++rowNNZ;
522  }
523  }
524 
525  int ret = ep_interp_matrix->ReplaceMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
526  if (ret != 0) {
527  ret = ep_interp_matrix->InsertMyValues(range_row, rowNNZ, values_d.data(), indices_d.data());
528  TEUCHOS_ASSERT(ret == 0);
529  }
530  } //end if owned
531  } // end range LID loop
532  } //end workset loop
533  } // Epetra fill
534  else
535 #endif
536  { // Tpetra fill
537  Kokkos::parallel_for(
538  "MiniEM_Interpolation::worksetLoop",
539  Kokkos::RangePolicy<int, typename tp_matrix::node_type::execution_space>(0, endCellRange),
540  KOKKOS_LAMBDA(const int cellNo) {
541  auto elemId = elementIds_d(elemIter+cellNo);
542 
543  // get IDs for range and domain dofs
544  auto rangeLIDs_d = Kokkos::subview(rangeElementLIDs_d, elemId, Kokkos::ALL());
545  auto domainLIDs_d = Kokkos::subview(domainElementLIDs_d, elemId, Kokkos::ALL());
546 
547 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
548  std::cout << "\n" << elemOrts_d(elemIter+cellNo).to_string() << std::endl;
549  std::cout << "rangeLIDs" << std::endl;
550  for (int i = 0; i < rangeCardinality; i++)
551  std::cout << rangeLIDs_d(i) << " ";
552  std::cout << std::endl << "domainLIDs" << std::endl;
553  for (int i = 0; i < domainCardinality; i++)
554  std::cout << domainLIDs_d(i) << " ";
555  std::cout << std::endl;
556 #endif
557  // loop over range LIDs
558  for(size_t rangeIter = 0; rangeIter < rangeCardinality; ++rangeIter) {
559  const LocalOrdinal range_row = rangeLIDs_d(rangeOffsets_d(rangeIter));
560  const bool isOwned = ((minLocalIndex <= range_row) && (range_row <= maxLocalIndex));
561  if (isOwned) {
562  // filter entries for zeros
563  for(size_t domainIter = 0; domainIter < domainCardinality; domainIter++) {
564  Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
565  if (KAT::magnitude(val) > entryFilterTol) {
566 
567 #if defined(PANZER_INTERPOLATION_DEBUG_OUTPUT) || defined(PANZER_DEBUG)
568  {
569  // Check that there is no entry yet or that we are overwriting it with the same value
570  auto row = lcl_tp_interp_matrix.rowConst(range_row);
571  for(LocalOrdinal kk = 0; kk<row.length; ++kk)
572  if (row.colidx(kk) == domainLIDs_d(domainOffsets_d(domainIter)))
573  if (!(KAT::magnitude(row.value(kk)-val) < entryFilterTol || KAT::magnitude(row.value(kk)) < entryFilterTol)) {
574 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
575  std::cout << "Replacing (" << range_row << "," << row.colidx(kk) << ") = " << row.value(kk) << " with " << val << std::endl;
576 #endif
577 #ifdef PANZER_DEBUG
578  Kokkos::abort("MiniEM interpolation worksetLoop failed!");
579 #endif
580  }
581  }
582 #endif
583 #ifdef PANZER_INTERPOLATION_DEBUG_OUTPUT
584  std::cout << "Setting (" << range_row << "," << domainLIDs_d(domainOffsets_d(domainIter)) << ") = " << val << std::endl;
585 #endif
586  lcl_tp_interp_matrix.replaceValues(range_row, &(domainLIDs_d(domainOffsets_d(domainIter))), 1, &val, /*is_sorted=*/false, /*force_atomic=*/true);
587  }
588  } //end if owned
589  } // isOwned
590  } // end range LID loop
591  }); //end workset loop
592  } // Tpetra fill
593  } //end element loop
594  } //end element block loop
595 
596  if (useTpetra) {
597  tp_interp_matrix->fillComplete(tp_domainmap, tp_rangemap);
598 
599  if (op != Intrepid2::OPERATOR_VALUE) {
600  // Discrete gradient, curl, etc actually live on edges, faces, etc, so we have a lot of zeros in the matrix.
601  tp_interp_matrix = removeSmallEntries(tp_interp_matrix, entryFilterTol);
602  }
603 
604  thyra_interp = Thyra::tpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,typename tp_matrix::node_type>(Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_rangemap),
605  Thyra::createVectorSpace<Scalar,LocalOrdinal,GlobalOrdinal>(tp_domainmap),
606  tp_interp_matrix);
607  }
608 #ifdef PANZER_HAVE_EPETRA_STACK
609  else
610  ep_interp_matrix->FillComplete(*ep_domainmap, *ep_rangemap);
611 #endif
612 
613  return thyra_interp;
614 }
615 
616  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
619  const Teuchos::RCP<panzer::DOFManager> &_domain_ugi,
620  const Teuchos::RCP<panzer::DOFManager> &_range_ugi,
621  const std::string& _domain_basis_name,
622  const std::string& _range_basis_name,
623  Intrepid2::EOperator _op,
624  size_t _worksetSize) :
625  name(""),
626  domain_basis_name(_domain_basis_name),
627  range_basis_name(_range_basis_name),
628  op(_op),
629  worksetSize(_worksetSize),
630  domain_ugi(_domain_ugi),
631  range_ugi(_range_ugi)
632  {
633 
634  using Teuchos::RCP;
635  using Teuchos::rcp;
636  using Teuchos::rcp_dynamic_cast;
637 
639 
640  // typedef Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal> tp_matrix;
641  typedef Tpetra::Map<LocalOrdinal,GlobalOrdinal> tp_map;
642 
643  RCP<const tp_map> tp_rangemap;
644  RCP<const tp_map> tp_domainmap;
645  RCP<const tp_map> tp_rowmap;
646  RCP<const tp_map> tp_colmap;
647 
648  {
649  // build maps
650  std::vector<GlobalOrdinal> gids;
651  range_ugi->getOwnedIndices(gids);
652  tp_rowmap = rcp(new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(), range_ugi->getComm()));
653  tp_rangemap = tp_rowmap;
654  domain_ugi->getOwnedIndices(gids);
655  tp_domainmap = rcp(new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(), domain_ugi->getComm()));
656  domain_ugi->getOwnedAndGhostedIndices(gids);
657  tp_colmap = rcp(new tp_map(OT::invalid(), gids.data(), gids.size(), OT::zero(), domain_ugi->getComm()));
658  }
659 
660  domainMap_ = tp_domainmap;
661  rangeMap_ = tp_rangemap;
662  columnMap_ = tp_colmap;
663  import_ = rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(domainMap_, columnMap_));
664 
666 
668 
669  }
670 
671  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
672  void
674  allocateColumnMapVector(size_t numVectors) {
675  colmapMV_ = rcp(new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(columnMap_, numVectors));
676  }
677 
678  // Pre-compute elements that own a DoF
679  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
680  void
683  size_t maxNumElementsPerBlock = 0;
684  int numCells;
685  if (maxNumElementsPerBlock > 0)
686  numCells = std::min(maxNumElementsPerBlock, worksetSize);
687  else
688  numCells = worksetSize;
689 
690  LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
691  owner_d_ = Kokkos::View<LocalOrdinal*,DeviceSpace>("owner", lclTargetSize);
692 
693  auto rangeLIDs_d = range_ugi->getLIDs();
694 
695  auto owner_d = owner_d_;
696 
697  // loop over element blocks
698  std::vector<std::string> elementBlockIds;
699  range_ugi->getElementBlockIds(elementBlockIds);
700  for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
701 
702  // loop over element worksets
703  std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
704  Kokkos::View<int*,DeviceSpace>::host_mirror_type elementIds_h(elementIds.data(), elementIds.size());
705  Kokkos::View<int*,DeviceSpace> elementIds_d("elementIds_d", elementIds_h.extent(0));
706  Kokkos::deep_copy(elementIds_d, elementIds_h);
707  for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
708  using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
709  Kokkos::parallel_for("miniEM::MatrixFreeInterpolationOp::cellLoop",
710  range_type(elemIter, std::min(elemIter+numCells,
711  elementIds_d.extent(0))),
712  KOKKOS_LAMBDA(const LocalOrdinal cellNo2) {
713  auto elemId = elementIds_d(cellNo2);
714 
715  // loop over range LIDs
716  for(size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
717  LocalOrdinal range_row = rangeLIDs_d(elemId, rangeIter);
718  if (range_row < lclTargetSize)
719  owner_d(range_row) = elemId;
720  }
721  });
722  }
723  }
724 
725  // get orientations for all blocks
726  std::map<std::string, std::vector<Intrepid2::Orientation> > orientations;
727  buildIntrepidOrientations(elementBlockIds, *conn, orientations);
728 
729  using HostSpace = Kokkos::HostSpace;
730 
731  // copy orientations to device
732  for (auto const& orientation : orientations) {
733  auto blockOrientations = orientation.second;
734  orientations_[orientation.first] = typename Kokkos::DynRankView<Intrepid2::Orientation,DeviceSpace>("elemOrts_d", blockOrientations.size());
735  Kokkos::View<Intrepid2::Orientation*, HostSpace> elemOrts_h(blockOrientations.data(), blockOrientations.size());
736  Kokkos::deep_copy(orientations_[orientation.first], elemOrts_h);
737  }
738  }
739 
740  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
741  void
743  apply (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
744  Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
745  Teuchos::ETransp mode,
746  Scalar alpha,
747  Scalar beta) const {
748 
749  if (mode == Teuchos::NO_TRANS) {
750  applyNonTransposed(X, Y, alpha, beta);
751  return;
752  } else if (mode == Teuchos::TRANS) {
753  applyTransposed(X, Y, alpha, beta);
754  return;
755  } else
756  TEUCHOS_ASSERT(false);
757  }
758 
759  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
760  void
762  applyNonTransposed (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
763  Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
764  Scalar alpha,
765  Scalar beta) const {
766 
767  using Teuchos::RCP;
768  using Teuchos::rcp;
769  using Teuchos::rcp_dynamic_cast;
770  using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
771 
772  using ots = Intrepid2::OrientationTools<DeviceSpace>;
773  using li = Intrepid2::LagrangianInterpolation<DeviceSpace>;
774  using DynRankDeviceView = Kokkos::DynRankView<Scalar,DeviceSpace>;
775  using view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
776  using const_view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
777 
778  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer(std::string("Mini-EM: matrix-free apply no_trans ") + name));
779 
780  using TST = Teuchos::ScalarTraits<Scalar>;
781  const Scalar ZERO = TST::zero();
782  if (beta == ZERO)
783  Y.putScalar(ZERO);
784  colmapMV_->doImport(X, *import_,Tpetra::INSERT);
785 
786  const_view_t lclX = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadOnly);
787  view_t lclY = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
788  size_t numVectors = lclY.extent(1);
789  LocalOrdinal lclTargetSize = getRangeMap()->getLocalNumElements();
790 
791  // get the domain and range bases
792  auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
793  auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,true)->getIntrepidBasis();
794  auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
795  auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,true)->getIntrepidBasis();
796 
797  // cardinalities
798  const size_t domainCardinality = domain_basis->getCardinality();
799  const size_t rangeCardinality = range_basis->getCardinality();
800 
801  size_t maxNumElementsPerBlock = 0;
802 
803  const int dim = range_basis->getBaseCellTopology().getDimension();
804 
805  if (op == Intrepid2::OPERATOR_VALUE) {
806  TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
807  TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
808  }
809 
810  // allocate some views
811  int numCells;
812  if (maxNumElementsPerBlock > 0)
813  numCells = std::min(maxNumElementsPerBlock, worksetSize);
814  else
815  numCells = worksetSize;
816  DynRankDeviceView range_dofCoords_d("range_dofCoords_d", rangeCardinality, dim);
817  DynRankDeviceView basisCoeffsLIOriented_d("basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
818 
819  // the ranks of these depend on dimension
820  DynRankDeviceView valuesAtDofCoordsNonOriented_d;
821  DynRankDeviceView valuesAtDofCoordsOriented_d;
822  DynRankDeviceView reducedValuesAtDofCoordsOriented_d;
823 
824  {
825  // Let Intrepid2 give us the correctly dimensioned view, then build one with +1 ranks and extent(0) == numCells
826  auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
827 
828  // These view have dimensions
829  // numCells, numFields=domainCardinality, numPoints=rangeCardinality, (spatialDim)
830  //
831  if (temp.rank() == 3) {
832  valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
833  valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
834  reducedValuesAtDofCoordsOriented_d = DynRankDeviceView("reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), temp.extent(2), numVectors);
835  } else {
836  valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
837  valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
838  reducedValuesAtDofCoordsOriented_d = DynRankDeviceView("reducedValuesAtDofCoordsOriented_d", numCells, temp.extent(1), numVectors);
839  }
840  }
841 
842  int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
843  TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
844 
845  auto rangeLIDs_d = range_ugi->getLIDs();
846  auto domainLIDs_d = domain_ugi->getLIDs();
847 
848  // range dof coordinates
849  range_basis->getDofCoords(range_dofCoords_d);
850 
851  // compute values of op * (domain basis) at range dof coords on reference element
852  domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
853 
854  // loop over element blocks
855  std::vector<std::string> elementBlockIds;
856  range_ugi->getElementBlockIds(elementBlockIds);
857  for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
858 
859  auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
860  auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
861 
862  // get element ids
864  {
865  std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
866  Kokkos::View<int *, Kokkos::HostSpace> elementIds_h(elementIds.data(), elementIds.size());
867  elementIds_d = Kokkos::View<int *, DeviceSpace>("elementIds_d", elementIds_h.extent(0));
868  Kokkos::deep_copy(elementIds_d, elementIds_h);
869  }
870 
871  // get element orientations
872  auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
873 
874  for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
875 
876  int endCellRange =
877  std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
878  Teuchos::as<int>(elemIter));
879 
880  // get subviews on workset
881  auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
882  auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
883  // Last workset might be shorter.
884  auto worksetRange = Kokkos::make_pair(0, endCellRange);
885  auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
886  auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
887 
888  // apply orientations for LO basis
889  // shuffles things in the second dimension, i.e. wrt LO basis
890  ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
891  valuesAtDofCoordsNonOriented_d,
892  elemOrtsWorkset_d,
893  domain_basis.get());
894 
895  Kokkos::deep_copy(reducedValuesAtDofCoordsOriented_d, 0.0);
896 
897  if (reducedValuesAtDofCoordsOriented_d.rank() == 4) {
898  Kokkos::parallel_for("miniEM:MatrixFreeInterpolationOp:cellLoop1",
899  range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
900  KOKKOS_LAMBDA(const LocalOrdinal cellNo) {
901  LocalOrdinal cellNo2 = elemIter+cellNo;
902  LocalOrdinal elemId = elementIds_d(cellNo2);
903  for(size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
904  LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
905  for(size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
906  for(size_t d=0; d<valuesAtDofCoordsOriented_d.extent(3); d++) {
907  Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter, d);
908  for (size_t j = 0; j<numVectors; ++j)
909  reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, d, j) += val * lclX(J, j);
910  }
911  }
912  }
913  });
914 
915  for (size_t j = 0; j<numVectors; ++j)
916  li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
917  Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), j),
918  range_basis.get(),
919  elemOrtsWorkset_d
920  );
921  } else {
922  Kokkos::parallel_for("miniEM:MatrixFreeInterpolationOp:cellLoop1",
923  range_type(0, std::min(numCells, elementIds_d.extent_int(0)-Teuchos::as<int>(elemIter))),
924  KOKKOS_LAMBDA(const LocalOrdinal cellNo) {
925  LocalOrdinal cellNo2 = elemIter+cellNo;
926  LocalOrdinal elemId = elementIds_d(cellNo2);
927  for(size_t domainIter=0; domainIter<domainCardinality; domainIter++) {
928  LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
929  for(size_t rangeIter=0; rangeIter<rangeCardinality; rangeIter++) {
930  Scalar val = valuesAtDofCoordsOriented_d(cellNo, domainIter, rangeIter);
931  for (size_t j = 0; j<numVectors; ++j)
932  reducedValuesAtDofCoordsOriented_d(cellNo, rangeIter, j) += val * lclX(J, j);
933  }
934  }
935  });
936 
937  for (size_t j = 0; j<numVectors; ++j)
938  li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), j),
939  Kokkos::subview(reducedValuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), j),
940  range_basis.get(),
941  elemOrtsWorkset_d
942  );
943  }
944 
945  auto owner_d = owner_d_;
946 
947  Kokkos::parallel_for("miniEM::MatrixFreeInterpolationOp::cellLoop2",
948  range_type(elemIter, std::min(elemIter+numCells,
949  elementIds_d.extent(0))),
950  KOKKOS_LAMBDA(const LocalOrdinal cellNo2) {
951  LocalOrdinal cellNo = cellNo2-elemIter;
952  LocalOrdinal elemId = elementIds_d(cellNo2);
953 
954  // loop over range LIDs
955  for(size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
956  LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
957 
958  // if owned
959  if ((range_row < lclTargetSize) && (owner_d(range_row) == elemId)) {
960 
961  for (size_t j = 0; j<numVectors; ++j) {
962  Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, j);
963  lclY(range_row,j) = beta*lclY(range_row,j) + alpha*val;
964  }
965  } // end if owned
966  } // end range LID loop
967  }); // end element loop
968  } //end workset loop
969  } //end element block loop
970 
971  }
972 
973  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
974  void
976  applyTransposed (const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
977  Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
978  Scalar alpha,
979  Scalar beta) const {
980 
981  using Teuchos::RCP;
982  using Teuchos::rcp;
983  using Teuchos::rcp_dynamic_cast;
984  using range_type = Kokkos::RangePolicy<LocalOrdinal, DeviceSpace>;
985 
986  typedef Intrepid2::OrientationTools<DeviceSpace> ots;
987  typedef Intrepid2::LagrangianInterpolation<DeviceSpace> li;
988  typedef Kokkos::DynRankView<Scalar,DeviceSpace> DynRankDeviceView;
989  using TST = Teuchos::ScalarTraits<Scalar>;
990  const Scalar ZERO = TST::zero();
991  using view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev;
992  using const_view_t = typename Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev::const_type;
993 
994  Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer(std::string("Mini-EM: matrix-free apply trans ") + name));
995 
996  const_view_t lclX = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
997  colmapMV_->putScalar(ZERO);
998  view_t lclYtemp = colmapMV_->getLocalViewDevice(Tpetra::Access::ReadWrite);
999 
1000  // get the domain and range bases
1001  auto domain_fieldPattern = domain_ugi->getFieldPattern(domain_basis_name);
1002  auto domain_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(domain_fieldPattern,true)->getIntrepidBasis();
1003  auto range_fieldPattern = range_ugi->getFieldPattern(range_basis_name);
1004  auto range_basis = rcp_dynamic_cast<const panzer::Intrepid2FieldPattern>(range_fieldPattern,true)->getIntrepidBasis();
1005 
1006  // cardinalities
1007  const size_t domainCardinality = domain_basis->getCardinality();
1008  const size_t rangeCardinality = range_basis->getCardinality();
1009 
1010  size_t maxNumElementsPerBlock = 0;
1011 
1012  const int dim = range_basis->getBaseCellTopology().getDimension();
1013 
1014  if (op == Intrepid2::OPERATOR_VALUE) {
1015  TEUCHOS_ASSERT(rangeCardinality >= domainCardinality);
1016  TEUCHOS_ASSERT_EQUALITY(domain_basis->getFunctionSpace(), range_basis->getFunctionSpace());
1017  }
1018 
1019  // allocate some views
1020  int numCells;
1021  if (maxNumElementsPerBlock > 0)
1022  numCells = std::min(maxNumElementsPerBlock, worksetSize);
1023  else
1024  numCells = worksetSize;
1025  DynRankDeviceView range_dofCoords_d("range_dofCoords_d", rangeCardinality, dim);
1026  DynRankDeviceView basisCoeffsLIOriented_d("basisCoeffsLIOriented_d", numCells, rangeCardinality, domainCardinality);
1027 
1028  // the ranks of these depend on dimension
1029  DynRankDeviceView valuesAtDofCoordsNonOriented_d;
1030  DynRankDeviceView valuesAtDofCoordsOriented_d;
1031 
1032  {
1033  // Let Intrepid2 give us the correctly dimensioned view, then build one with +1 ranks and extent(0) == numCells
1034  auto temp = domain_basis->allocateOutputView(rangeCardinality, op);
1035 
1036  // These view have dimensions
1037  // numCells, numFields=domainCardinality, numPoints=rangeCardinality, (spatialDim)
1038  //
1039  if (temp.rank() == 3) {
1040  valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1), temp.extent(2));
1041  valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1), temp.extent(2));
1042  } else {
1043  valuesAtDofCoordsNonOriented_d = DynRankDeviceView("valuesAtDofCoordsNonOriented_d", temp.extent(0), temp.extent(1));
1044  valuesAtDofCoordsOriented_d = DynRankDeviceView("valuesAtDofCoordsOriented_d", numCells, temp.extent(0), temp.extent(1));
1045  }
1046  }
1047 
1048  int fieldRank = Intrepid2::getFieldRank(range_basis->getFunctionSpace());
1049  TEUCHOS_ASSERT((fieldRank == 0) || (fieldRank == 1));
1050 
1051  auto rangeLIDs_d = range_ugi->getLIDs();
1052  auto domainLIDs_d = domain_ugi->getLIDs();
1053  Kokkos::fence();
1054 
1055  // range dof coordinates
1056  range_basis->getDofCoords(range_dofCoords_d);
1057 
1058  // compute values of op * (domain basis) at range dof coords on reference element
1059  domain_basis->getValues(valuesAtDofCoordsNonOriented_d, range_dofCoords_d, op);
1060 
1061  // loop over element blocks
1062  std::vector<std::string> elementBlockIds;
1063  range_ugi->getElementBlockIds(elementBlockIds);
1064  for(std::size_t blockIter = 0; blockIter < elementBlockIds.size(); ++blockIter) {
1065 
1066  auto rangeOffsets_d = range_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1067  auto domainOffsets_d = domain_ugi->getGIDFieldOffsetsKokkos(elementBlockIds[blockIter], 0);
1068 
1069  // get element ids
1070  Kokkos::View<int *, DeviceSpace> elementIds_d;
1071  {
1072  std::vector<int> elementIds = range_ugi->getElementBlock(elementBlockIds[blockIter]);
1073  Kokkos::View<int *, Kokkos::HostSpace> elementIds_h(elementIds.data(), elementIds.size());
1074  elementIds_d = Kokkos::View<int *, DeviceSpace>("elementIds_d", elementIds_h.extent(0));
1075  Kokkos::deep_copy(elementIds_d, elementIds_h);
1076  }
1077 
1078  // get element orientations
1079  auto elemOrts_d = orientations_.at(elementBlockIds[blockIter]);
1080 
1081  for(std::size_t elemIter = 0; elemIter < elementIds_d.extent(0); elemIter += numCells) {
1082 
1083  int endCellRange =
1084  std::min(numCells, Teuchos::as<int>(elementIds_d.extent(0)) -
1085  Teuchos::as<int>(elemIter));
1086 
1087  // get subviews on workset
1088  auto ortsRange = Kokkos::make_pair(elemIter, std::min(elemIter + numCells, elemOrts_d.extent(0)));
1089  auto elemOrtsWorkset_d = Kokkos::subview(elemOrts_d, ortsRange);
1090  // Last workset might be shorter.
1091  auto worksetRange = Kokkos::make_pair(0, endCellRange);
1092  auto valuesAtDofCoordsOrientedWorkset_d = Kokkos::subview(valuesAtDofCoordsOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL());
1093  auto basisCoeffsLIOrientedWorkset_d = Kokkos::subview(basisCoeffsLIOriented_d, worksetRange, Kokkos::ALL(), Kokkos::ALL());
1094 
1095  // apply orientations for domain basis
1096  // shuffles things in the second dimension, i.e. wrt domain basis
1097  ots::modifyBasisByOrientation(valuesAtDofCoordsOrientedWorkset_d,
1098  valuesAtDofCoordsNonOriented_d,
1099  elemOrtsWorkset_d,
1100  domain_basis.get());
1101  Kokkos::fence();
1102 
1103  // get basis coefficients of domain basis functions wrt range basis
1104  for(size_t domainIter=0; domainIter<domainCardinality; domainIter++)
1105  // Get basis coeffs wrt range basis on reference element.
1106  // basisCoeffsLI has dimensions (numCells, numFields=rangeCardinality, domainCardinality)
1107  li::getBasisCoeffs(Kokkos::subview(basisCoeffsLIOrientedWorkset_d, Kokkos::ALL(), Kokkos::ALL(), domainIter),
1108  Kokkos::subview(valuesAtDofCoordsOrientedWorkset_d, Kokkos::ALL(), domainIter, Kokkos::ALL(), Kokkos::ALL()),
1109  range_basis.get(), elemOrtsWorkset_d);
1110  Kokkos::fence();
1111 
1112  auto owner_d = owner_d_;
1113 
1114 
1115  Kokkos::parallel_for("miniEM::MatrixFreeInterpolationOp::cellLoop",
1116  range_type(elemIter, std::min(elemIter+numCells,
1117  elementIds_d.extent(0))),
1118  KOKKOS_LAMBDA(const LocalOrdinal cellNo2) {
1119  LocalOrdinal cellNo = cellNo2-elemIter;
1120  LocalOrdinal elemId = elementIds_d(cellNo2);
1121 
1122  // loop over range LIDs
1123  for(size_t rangeIter = 0; rangeIter < rangeLIDs_d.extent(1); ++rangeIter) {
1124  LocalOrdinal range_row = rangeLIDs_d(elemId, rangeOffsets_d(rangeIter));
1125 
1126  // if owned
1127  if ((range_row < (LocalOrdinal) lclX.extent(0)) && (owner_d(range_row) == elemId)) {
1128 
1129  for(size_t domainIter = 0; domainIter < domainLIDs_d.extent(1); domainIter++) {
1130  LocalOrdinal J = domainLIDs_d(elemId, domainOffsets_d(domainIter));
1131  Scalar val = basisCoeffsLIOriented_d(cellNo, rangeIter, domainIter);
1132  for (size_t j = 0; j<lclYtemp.extent(1); ++j)
1133  Kokkos::atomic_add(&lclYtemp(J,j), alpha*val*lclX(range_row,j));
1134  }
1135  } //end if owned
1136  } //end range LID loop
1137  }); // end element loop
1138  } //end workset loop
1139  } //end element block loop
1140  Kokkos::fence();
1141 
1142  if (beta == ZERO)
1143  Y.putScalar(ZERO);
1144  else
1145  Y.scale(beta);
1146  Y.doExport(*colmapMV_, *import_, Tpetra::ADD_ASSIGN);
1147  Kokkos::fence();
1148  }
1149 
1150 
1151 }
Teuchos::RCP< panzer::DOFManager > range_ugi
void getElementBlockIds(std::vector< std::string > &elementBlockIds) const
MatrixFreeInterpolationOp(const Teuchos::RCP< const panzer::ConnManager > &conn, const Teuchos::RCP< panzer::DOFManager > &_domain_ugi, const Teuchos::RCP< panzer::DOFManager > &_range_ugi, const std::string &_domain_basis_name, const std::string &_range_basis_name, Intrepid2::EOperator _op=Intrepid2::OPERATOR_VALUE, size_t _worksetSize=1000)
static magnitudeType eps()
const PHX::View< const int * > getGIDFieldOffsetsKokkos(const std::string &blockID, int fieldNum) const
const Kokkos::View< const panzer::LocalOrdinal **, Kokkos::LayoutRight, PHX::Device > getLIDs() const
void getOwnedIndices(std::vector< panzer::GlobalOrdinal > &indices) const
Get the set of indices owned by this processor.
void applyNonTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
void buildIntrepidOrientations(const std::vector< std::string > &eBlockNames, const panzer::ConnManager &connMgrInput, std::map< std::string, std::vector< Intrepid2::Orientation >> &orientations)
Builds the element orientations for the specified element blocks.
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > domainMap_
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, typename Teuchos::ScalarTraits< Scalar >::magnitudeType tol)
static RCP< Time > getNewTimer(const std::string &name)
void applyTransposed(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Teuchos::RCP< Thyra::LinearOpBase< double > > buildInterpolation(const Teuchos::RCP< const panzer::LinearObjFactory< panzer::Traits >> &linObjFactory, const std::string &domain_basis_name, const std::string &range_basis_name, Intrepid2::EOperator op, size_t worksetSize, const bool matrixFree)
Kokkos::View< typename Sacado::ScalarType< ScalarT >::type **, Kokkos::LayoutRight, PHX::Device > vals
void precomputeOwnersAndOrientations(const Teuchos::RCP< const panzer::ConnManager > &conn)
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > rangeMap_
Teuchos::RCP< Teuchos::Comm< int > > getComm() const
void apply(const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > columnMap_
Teuchos::RCP< const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > > import_
void allocateColumnMapVector(size_t numVectors)
void getOwnedAndGhostedIndices(std::vector< panzer::GlobalOrdinal > &indices) const
Get the set of owned and ghosted indices for this processor.
#define TEUCHOS_ASSERT(assertion_test)
Teuchos::RCP< panzer::DOFManager > domain_ugi
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
const std::vector< panzer::LocalOrdinal > & getElementBlock(const std::string &blockId) const
Get the owned element block.
Teuchos::RCP< const FieldPattern > getFieldPattern(const std::string &name) const
Find a field pattern stored for a particular block and field number. This will retrive the pattern ad...