Intrepid2
Intrepid2_HierarchicalBasis_HDIV_PYR.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Intrepid2 Package
4 //
5 // Copyright 2007 NTESS and the Intrepid2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
19 #ifndef Intrepid2_HierarchicalBasis_HDIV_PYR_h
20 #define Intrepid2_HierarchicalBasis_HDIV_PYR_h
21 
22 #include <Kokkos_DynRankView.hpp>
23 
24 #include <Intrepid2_config.h>
25 
26 #include "Intrepid2_Basis.hpp"
32 #include "Intrepid2_Utils.hpp"
33 
34 #include <math.h>
35 
36 #include "Teuchos_RCP.hpp"
37 
38 namespace Intrepid2
39 {
45  template<class DeviceType, class OutputScalar, class PointScalar,
46  class OutputFieldType, class InputPointsType>
48  {
49  using ExecutionSpace = typename DeviceType::execution_space;
50  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
51  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
52  using OutputScratchView2D = Kokkos::View<OutputScalar**,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
53  using PointScratchView = Kokkos::View<PointScalar*, ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
54 
55  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
56  using TeamMember = typename TeamPolicy::member_type;
57 
58  EOperator opType_;
59 
60  OutputFieldType output_; // F,P
61  InputPointsType inputPoints_; // P,D
62 
63  int polyOrder_;
64  bool useCGBasis_;
65  int numFields_, numPoints_;
66 
67  size_t fad_size_output_;
68 
69  static const int numVertices = 5;
70  static const int numMixedEdges = 4;
71  static const int numTriEdges = 4;
72  static const int numEdges = 8;
73  // the following ordering of the edges matches that used by ESEAS
74  // (it *looks* like this is what ESEAS uses; the basis comparison tests will clarify whether I've read their code correctly)
75  // see also PyramidEdgeNodeMap in Shards_BasicTopologies.hpp
76  const int edge_start_[numEdges] = {0,1,2,3,0,1,2,3}; // edge i is from edge_start_[i] to edge_end_[i]
77  const int edge_end_[numEdges] = {1,2,3,0,4,4,4,4}; // edge i is from edge_start_[i] to edge_end_[i]
78 
79  // quadrilateral face comes first
80  static const int numQuadFaces = 1;
81  static const int numTriFaces = 4;
82 
83  // face ordering matches ESEAS. (See BlendProjectPyraTF in ESEAS.)
84  const int tri_face_vertex_0[numTriFaces] = {0,1,3,0}; // faces are abc where 0 ≤ a < b < c ≤ 3
85  const int tri_face_vertex_1[numTriFaces] = {1,2,2,3};
86  const int tri_face_vertex_2[numTriFaces] = {4,4,4,4};
87 
88  Hierarchical_HDIV_PYR_Functor(EOperator opType, OutputFieldType output, InputPointsType inputPoints,
89  int polyOrder)
90  : opType_(opType), output_(output), inputPoints_(inputPoints),
91  polyOrder_(polyOrder),
92  fad_size_output_(getScalarDimensionForView(output))
93  {
94  numFields_ = output.extent_int(0);
95  numPoints_ = output.extent_int(1);
96  const auto & p = polyOrder;
97  const auto basisCardinality = p * p + 2 * p * (p+1) + 3 * p * p * (p-1);
98 
99  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != inputPoints.extent_int(0), std::invalid_argument, "point counts need to match!");
100  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != basisCardinality, std::invalid_argument, "output field size does not match basis cardinality");
101  }
102 
104  KOKKOS_INLINE_FUNCTION
105  void cross(Kokkos::Array<OutputScalar,3> &c,
106  const Kokkos::Array<OutputScalar,3> &a,
107  const Kokkos::Array<OutputScalar,3> &b) const
108  {
109  c[0] = a[1] * b[2] - a[2] * b[1];
110  c[1] = a[2] * b[0] - a[0] * b[2];
111  c[2] = a[0] * b[1] - a[1] * b[0];
112  }
113 
115  KOKKOS_INLINE_FUNCTION
116  void dot(OutputScalar &c,
117  const Kokkos::Array<OutputScalar,3> &a,
118  const Kokkos::Array<OutputScalar,3> &b) const
119  {
120  c = 0;
121  for (ordinal_type d=0; d<3; d++)
122  {
123  c += a[d] * b[d];
124  }
125  }
126 
127  KOKKOS_INLINE_FUNCTION
128  OutputScalar dot(const Kokkos::Array<OutputScalar,3> &a,
129  const Kokkos::Array<OutputScalar,3> &b) const
130  {
131  OutputScalar c = 0;
132  for (ordinal_type d=0; d<3; d++)
133  {
134  c += a[d] * b[d];
135  }
136  return c;
137  }
138 
139  KOKKOS_INLINE_FUNCTION
140  void E_E(Kokkos::Array<OutputScalar,3> &EE,
141  const ordinal_type &i,
142  const OutputScratchView &PHom,
143  const PointScalar &s0, const PointScalar &s1,
144  const Kokkos::Array<PointScalar,3> &s0_grad,
145  const Kokkos::Array<PointScalar,3> &s1_grad) const
146  {
147  const auto & P_i = PHom(i);
148  for (ordinal_type d=0; d<3; d++)
149  {
150  EE[d] = P_i * (s0 * s1_grad[d] - s1 * s0_grad[d]);
151  }
152  }
153 
154  KOKKOS_INLINE_FUNCTION
155  void E_E_CURL(Kokkos::Array<OutputScalar,3> &curl_EE,
156  const ordinal_type &i,
157  const OutputScratchView &PHom,
158  const PointScalar &s0, const PointScalar &s1,
159  const Kokkos::Array<PointScalar,3> &s0_grad,
160  const Kokkos::Array<PointScalar,3> &s1_grad) const
161  {
162  // curl (E_i^E)(s0,s1) = (i+2) [P_i](s0,s1) (grad s0 x grad s1)
163  OutputScalar ip2_Pi = (i+2) * PHom(i);
164  cross(curl_EE, s0_grad, s1_grad);
165  for (ordinal_type d=0; d<3; d++)
166  {
167  curl_EE[d] *= ip2_Pi;
168  }
169  }
170 
173  KOKKOS_INLINE_FUNCTION
174  void V_QUAD(Kokkos::Array<OutputScalar,3> &VQUAD,
175  const ordinal_type &i, const ordinal_type &j,
176  const OutputScratchView &PHom_s,
177  const PointScalar &s0, const PointScalar &s1,
178  const Kokkos::Array<PointScalar,3> &s0_grad,
179  const Kokkos::Array<PointScalar,3> &s1_grad,
180  const OutputScratchView &PHom_t,
181  const PointScalar &t0, const PointScalar &t1,
182  const Kokkos::Array<PointScalar,3> &t0_grad,
183  const Kokkos::Array<PointScalar,3> &t1_grad) const
184  {
185  Kokkos::Array<OutputScalar,3> EE_i, EE_j;
186 
187  E_E(EE_i, i, PHom_s, s0, s1, s0_grad, s1_grad);
188  E_E(EE_j, j, PHom_t, t0, t1, t0_grad, t1_grad);
189 
190  // VQUAD = EE_i x EE_j:
191  cross(VQUAD, EE_i, EE_j);
192  }
193 
196  KOKKOS_INLINE_FUNCTION
197  void E_QUAD(Kokkos::Array<OutputScalar,3> &EQUAD,
198  const ordinal_type &i, const ordinal_type &j,
199  const OutputScratchView &HomPi_s01,
200  const PointScalar &s0, const PointScalar &s1,
201  const Kokkos::Array<PointScalar,3> &s0_grad,
202  const Kokkos::Array<PointScalar,3> &s1_grad,
203  const OutputScratchView &HomLi_t01) const
204  {
205  const OutputScalar &phiE_j = HomLi_t01(j);
206 
207  Kokkos::Array<OutputScalar,3> EE_i;
208  E_E(EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
209 
210  for (ordinal_type d=0; d<3; d++)
211  {
212  EQUAD[d] = phiE_j * EE_i[d];
213  }
214  }
215 
216  KOKKOS_INLINE_FUNCTION
217  void E_QUAD_CURL(Kokkos::Array<OutputScalar,3> &EQUAD_CURL,
218  const ordinal_type &i, const ordinal_type &j,
219  const OutputScratchView &HomPi_s01,
220  const PointScalar &s0, const PointScalar &s1,
221  const Kokkos::Array<PointScalar,3> &s0_grad,
222  const Kokkos::Array<PointScalar,3> &s1_grad,
223  const OutputScratchView &HomPj_t01,
224  const OutputScratchView &HomLj_t01,
225  const OutputScratchView &HomLj_dt_t01,
226  const Kokkos::Array<PointScalar,3> &t0_grad,
227  const Kokkos::Array<PointScalar,3> &t1_grad) const
228  {
229  const OutputScalar &phiE_j = HomLj_t01(j);
230 
231  Kokkos::Array<OutputScalar,3> curl_EE_i;
232  E_E_CURL(curl_EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
233 
234  Kokkos::Array<OutputScalar,3> EE_i;
235  E_E(EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
236 
237  Kokkos::Array<OutputScalar,3> grad_phiE_j;
238  computeGradHomLi(grad_phiE_j, j, HomPj_t01, HomLj_dt_t01, t0_grad, t1_grad);
239 
240  cross(EQUAD_CURL, grad_phiE_j, EE_i);
241  for (ordinal_type d=0; d<3; d++)
242  {
243  EQUAD_CURL[d] += phiE_j * curl_EE_i[d];
244  }
245  }
246 
248  KOKKOS_INLINE_FUNCTION
249  void V_LEFT_TRI(Kokkos::Array<OutputScalar,3> &VLEFTTRI,
250  const OutputScalar &phi_i, const Kokkos::Array<OutputScalar,3> &phi_i_grad,
251  const OutputScalar &phi_j, const Kokkos::Array<OutputScalar,3> &phi_j_grad,
252  const OutputScalar &t0, const Kokkos::Array<OutputScalar,3> &t0_grad) const {
253  cross(VLEFTTRI, phi_i_grad, phi_j_grad);
254  const OutputScalar t0_2 = t0 * t0;
255  for (ordinal_type d=0; d<3; d++)
256  {
257  VLEFTTRI[d] *= t0_2;
258  }
259 
260  Kokkos::Array<OutputScalar,3> tmp_t0_grad_t0, tmp_diff, tmp_cross;
261  for (ordinal_type d=0; d<3; d++)
262  {
263  tmp_t0_grad_t0[d] = t0 * t0_grad[d];
264  tmp_diff[d] = phi_i * phi_j_grad[d] - phi_j * phi_i_grad[d];
265  }
266  cross(tmp_cross, tmp_t0_grad_t0, tmp_diff);
267 
268  for (ordinal_type d=0; d<3; d++)
269  {
270  VLEFTTRI[d] += tmp_cross[d];
271  }
272  };
273 
274 
276  KOKKOS_INLINE_FUNCTION
277  void V_RIGHT_TRI(Kokkos::Array<OutputScalar,3> &VRIGHTTRI,
278  const OutputScalar &mu1, const Kokkos::Array<OutputScalar,3> &mu1_grad,
279  const OutputScalar &phi_i, const Kokkos::Array<OutputScalar,3> &phi_i_grad,
280  const OutputScalar &t0, const Kokkos::Array<OutputScalar,3> &t0_grad) const {
281  Kokkos::Array<OutputScalar,3> left_vector; // left vector in the cross product we take below.
282 
283  const OutputScalar t0_2 = t0 * t0;
284  for (ordinal_type d=0; d<3; d++)
285  {
286  left_vector[d] = t0_2 * phi_i_grad[d] + 2. * t0 * phi_i * t0_grad[d];
287  }
288  cross(VRIGHTTRI, left_vector, mu1_grad);
289  };
290 
291  // This is the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
292  KOKKOS_INLINE_FUNCTION
293  void V_TRI(Kokkos::Array<OutputScalar,3> &VTRI,
294  const ordinal_type &i, // i >= 0
295  const ordinal_type &j, // j >= 0
296  const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
297  const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
298  const Kokkos::Array<PointScalar,3> &vectorWeight) const // s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1) -- see computeFaceVectorWeight()
299  {
300  const auto &P_i = P(i);
301  const auto &P_2ip1_j = P_2ip1(j);
302 
303  for (ordinal_type d=0; d<3; d++)
304  {
305  VTRI[d] = P_i * P_2ip1_j * vectorWeight[d];
306  }
307  }
308 
309  // Divergence of the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
310  KOKKOS_INLINE_FUNCTION
311  void V_TRI_DIV(OutputScalar &VTRI_DIV,
312  const ordinal_type &i, // i >= 0
313  const ordinal_type &j, // j >= 0
314  const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
315  const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
316  const OutputScalar &divWeight) const // grad s0 \dot (grad s1 x grad s2)
317  {
318  const auto &P_i = P(i);
319  const auto &P_2ip1_j = P_2ip1(j);
320 
321  VTRI_DIV = (i + j + 3.) * P_i * P_2ip1_j * divWeight;
322  }
323 
325  KOKKOS_INLINE_FUNCTION
326  void V_TRI_B42(Kokkos::Array<OutputScalar,3> &VTRI_mus0_mus1_s2_over_mu,
327  const Kokkos::Array<OutputScalar,3> &VTRI_00_s0_s1_s2,
328  const Kokkos::Array<OutputScalar,3> &EE_0_s0_s1,
329  const OutputScalar &s2,
330  const OutputScalar &mu, const Kokkos::Array<OutputScalar,3> &mu_grad,
331  const ordinal_type &i, // i >= 0
332  const ordinal_type &j, // j >= 0
333  const OutputScratchView &P_mus0_mus1, // container in which shiftedScaledLegendreValues have been computed for the appropriate face, with arguments (mu s0, mu s1)
334  const OutputScratchView &P_2ip1_mus0pmus1_s2 // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face, with arguments (mu s0 + mu s1, s2)
335  ) const
336  {
337  const auto &Pi_mus0_mus1 = P_mus0_mus1(i);
338  const auto &Pj_2ip1_mus0pmus1_s2 = P_2ip1_mus0pmus1_s2(j);
339 
340  // place s2 (grad mu) x E^E_0 in result container
341  cross(VTRI_mus0_mus1_s2_over_mu, mu_grad, EE_0_s0_s1);
342  for (ordinal_type d=0; d<3; d++)
343  {
344  VTRI_mus0_mus1_s2_over_mu[d] *= s2;
345  }
346 
347  // add mu V^{tri}_00 to it:
348  for (ordinal_type d=0; d<3; d++)
349  {
350  VTRI_mus0_mus1_s2_over_mu[d] += mu * VTRI_00_s0_s1_s2[d];
351  }
352 
353  // multiply by [P_i, P^{2i+1}_j]
354  for (ordinal_type d=0; d<3; d++)
355  {
356  VTRI_mus0_mus1_s2_over_mu[d] *= Pi_mus0_mus1 * Pj_2ip1_mus0pmus1_s2;
357  }
358  }
359 
361  KOKKOS_INLINE_FUNCTION
362  void V_TRI_B42_DIV(OutputScalar &div_VTRI_mus0_mus1_s2_over_mu,
363  const Kokkos::Array<OutputScalar,3> &VTRI_00_s0_s1_s2,
364  const Kokkos::Array<OutputScalar,3> &EE_0_s0_s1,
365  const OutputScalar &s2, const Kokkos::Array<OutputScalar,3> &s2_grad,
366  const OutputScalar &mu, const Kokkos::Array<OutputScalar,3> &mu_grad,
367  const ordinal_type &i, // i >= 0
368  const ordinal_type &j, // j >= 0
369  const OutputScratchView &P_mus0_mus1, // container in which shiftedScaledLegendreValues have been computed for the appropriate face, with arguments (mu s0, mu s1)
370  const OutputScratchView &P_2ip1_mus0pmus1_s2 // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face, with arguments (mu s0 + mu s1, s2)
371  ) const
372  {
373  const auto &Pi_mus0_mus1 = P_mus0_mus1(i);
374  const auto &Pj_2ip1_mus0pmus1_s2 = P_2ip1_mus0pmus1_s2(j);
375 
376  Kokkos::Array<OutputScalar,3> vector;
377 
378  // place E^E_0 x grad s2 in scratch vector container
379  cross(vector, EE_0_s0_s1, s2_grad);
380  // multiply by (i+j+3)
381  for (ordinal_type d=0; d<3; d++)
382  {
383  vector[d] *= i+j+3.;
384  }
385  // subtract V_00
386  for (ordinal_type d=0; d<3; d++)
387  {
388  vector[d] -= VTRI_00_s0_s1_s2[d];
389  }
390 
391  OutputScalar dot_product;
392  dot(dot_product, mu_grad, vector);
393 
394  div_VTRI_mus0_mus1_s2_over_mu = Pi_mus0_mus1 * Pj_2ip1_mus0pmus1_s2 * dot_product;
395  }
396 
397  KOKKOS_INLINE_FUNCTION
398  void computeFaceVectorWeight(Kokkos::Array<OutputScalar,3> &vectorWeight,
399  const PointScalar &s0, const Kokkos::Array<PointScalar,3> &s0Grad,
400  const PointScalar &s1, const Kokkos::Array<PointScalar,3> &s1Grad,
401  const PointScalar &s2, const Kokkos::Array<PointScalar,3> &s2Grad) const
402  {
403  // compute s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1)
404 
405  Kokkos::Array<Kokkos::Array<PointScalar,3>,3> cross_products;
406 
407  cross(cross_products[0], s1Grad, s2Grad);
408  cross(cross_products[1], s2Grad, s0Grad);
409  cross(cross_products[2], s0Grad, s1Grad);
410 
411  Kokkos::Array<PointScalar,3> s {s0,s1,s2};
412 
413  for (ordinal_type d=0; d<3; d++)
414  {
415  OutputScalar v_d = 0;
416  for (ordinal_type i=0; i<3; i++)
417  {
418  v_d += s[i] * cross_products[i][d];
419  }
420  vectorWeight[d] = v_d;
421  }
422  }
423 
424  KOKKOS_INLINE_FUNCTION
425  void computeFaceDivWeight(OutputScalar &divWeight,
426  const Kokkos::Array<PointScalar,3> &s0Grad,
427  const Kokkos::Array<PointScalar,3> &s1Grad,
428  const Kokkos::Array<PointScalar,3> &s2Grad) const
429  {
430  // grad s0 \dot (grad s1 x grad s2)
431 
432  Kokkos::Array<PointScalar,3> grad_s1_cross_grad_s2;
433  cross(grad_s1_cross_grad_s2, s1Grad, s2Grad);
434 
435  dot(divWeight, s0Grad, grad_s1_cross_grad_s2);
436  }
437 
438  KOKKOS_INLINE_FUNCTION
439  void computeGradHomLi(Kokkos::Array<OutputScalar,3> &HomLi_grad, // grad [L_i](s0,s1)
440  const ordinal_type i,
441  const OutputScratchView &HomPi_s0s1, // [P_i](s0,s1)
442  const OutputScratchView &HomLi_dt_s0s1, // [d/dt L_i](s0,s1)
443  const Kokkos::Array<PointScalar,3> &s0Grad,
444  const Kokkos::Array<PointScalar,3> &s1Grad) const
445  {
446 // grad [L_i](s0,s1) = [P_{i-1}](s0,s1) * grad s1 + [R_{i-1}](s0,s1) * grad (s0 + s1)
447  const auto & R_i_minus_1 = HomLi_dt_s0s1(i); // d/dt L_i = R_{i-1}
448  const auto & P_i_minus_1 = HomPi_s0s1(i-1);
449  for (ordinal_type d=0; d<3; d++)
450  {
451  HomLi_grad[d] = P_i_minus_1 * s1Grad[d] + R_i_minus_1 * (s0Grad[d] + s1Grad[d]);
452  }
453  }
454 
455  KOKKOS_INLINE_FUNCTION
456  void operator()( const TeamMember & teamMember ) const
457  {
458  auto pointOrdinal = teamMember.league_rank();
459  OutputScratchView scratch1D_1, scratch1D_2, scratch1D_3;
460  OutputScratchView scratch1D_4, scratch1D_5, scratch1D_6;
461  OutputScratchView scratch1D_7, scratch1D_8, scratch1D_9;
462  if (fad_size_output_ > 0) {
463  scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
464  scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
465  scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
466  scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
467  scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
468  scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
469  scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
470  scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
471  scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
472  }
473  else {
474  scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
475  scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
476  scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
477  scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
478  scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
479  scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
480  scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
481  scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
482  scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
483  }
484 
485  const auto & x = inputPoints_(pointOrdinal,0);
486  const auto & y = inputPoints_(pointOrdinal,1);
487  const auto & z = inputPoints_(pointOrdinal,2);
488 
489  // Intrepid2 uses (-1,1)^2 for x,y
490  // ESEAS uses (0,1)^2
491 
492  Kokkos::Array<PointScalar,3> coords;
493  transformToESEASPyramid<>(coords[0], coords[1], coords[2], x, y, z); // map x,y coordinates from (-z,z)^2 to (0,z)^2
494 
495  // pyramid "affine" coordinates and gradients get stored in lambda, lambdaGrad:
496  using Kokkos::Array;
497  Array<PointScalar,5> lambda;
498  Array<Kokkos::Array<PointScalar,3>,5> lambdaGrad;
499 
500  Array<Array<PointScalar,3>,2> mu;
501  Array<Array<Array<PointScalar,3>,3>,2> muGrad;
502 
503  Array<Array<PointScalar,2>,3> nu;
504  Array<Array<Array<PointScalar,3>,2>,3> nuGrad;
505 
506  affinePyramid(lambda, lambdaGrad, mu, muGrad, nu, nuGrad, coords);
507 
508  switch (opType_)
509  {
510  case OPERATOR_VALUE:
511  {
512  ordinal_type fieldOrdinalOffset = 0;
513  // quadrilateral face
514  {
515  // rename scratch1, scratch2
516  auto & Pi = scratch1D_1;
517  auto & Pj = scratch1D_2;
518 
519  auto & s0 = mu[0][0], & s1 = mu[1][0];
520  auto & s0_grad = muGrad[0][0], & s1_grad = muGrad[1][0];
521  auto & t0 = mu[0][1], & t1 = mu[1][1];
522  auto & t0_grad = muGrad[0][1], & t1_grad = muGrad[1][1];
523 
524  Polynomials::shiftedScaledLegendreValues(Pi, polyOrder_-1, s1, s0 + s1);
525  Polynomials::shiftedScaledLegendreValues(Pj, polyOrder_-1, t1, t0 + t1);
526 
527  const auto & muZ_0 = mu[0][2];
528  OutputScalar mu0_cubed = muZ_0 * muZ_0 * muZ_0;
529 
530  // following the ESEAS ordering: j increments first
531  for (int j=0; j<polyOrder_; j++)
532  {
533  for (int i=0; i<polyOrder_; i++)
534  {
535  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
536  V_QUAD(VQUAD, i, j,
537  Pi, s0, s1, s0_grad, s1_grad,
538  Pj, t0, t1, t0_grad, t1_grad);
539 
540  for (ordinal_type d=0; d<3; d++)
541  {
542  output_(fieldOrdinalOffset,pointOrdinal,d) = mu0_cubed * VQUAD[d];
543  }
544  fieldOrdinalOffset++;
545  }
546  }
547  }
548 
549  // triangle faces
550  {
551  /*
552  Face functions for triangular face (d,e,f) given by:
553  1/2 * ( mu_c^{zeta,xi_b} * V_TRI_ij(nu_012^{zeta,xi_a})
554  + 1/mu_c^{zeta,xi_b} * V_TRI_ij(nu_012^{zeta,xi_a} * mu_c^{zeta,xi_b}) )
555  but the division by mu_c^{zeta,xi_b} should ideally be avoided in computations; there is
556  an alternate expression, not implemented by ESEAS. We start with the above expression
557  so that we can get agreement with ESEAS. Hopefully after that we can do the alternative,
558  and confirm that we maintain agreement with ESEAS for points where ESEAS does not generate
559  nans.
560 
561 
562  where s0, s1, s2 are nu[0][a-1],nu[1][a-1],nu[2][a-1] (nu_012 above)
563  and (a,b) = (1,2) for faces 0,2; (a,b) = (2,1) for others;
564  c = 0 for faces 0,3; c = 1 for others
565  */
566  const auto & P = scratch1D_1; // for V_TRI( nu_012)
567  const auto & P_2ip1 = scratch1D_2;
568  const auto & Pmu = scratch1D_3; // for V_TRI( mu * nu_012)
569  const auto & Pmu_2ip1 = scratch1D_4;
570  for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
571  {
572  // face 0,2 --> a=1, b=2
573  // face 1,3 --> a=2, b=1
574  int a = (faceOrdinal % 2 == 0) ? 1 : 2;
575  int b = 3 - a;
576  // face 0,3 --> c=0
577  // face 1,2 --> c=1
578  int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
579 
580  const auto & s0 = nu[0][a-1]; const auto & s0_grad = nuGrad[0][a-1];
581  const auto & s1 = nu[1][a-1]; const auto & s1_grad = nuGrad[1][a-1];
582  const auto & s2 = nu[2][a-1];
583  const PointScalar jacobiScaling = s0 + s1 + s2;
584 
585  const PointScalar legendreScaling = s0 + s1;
586  Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
587 
588  const auto lambda0_index = tri_face_vertex_0[faceOrdinal];
589  const auto lambda1_index = tri_face_vertex_1[faceOrdinal];
590  const auto lambda2_index = tri_face_vertex_2[faceOrdinal];
591 
592  const auto & mu_c_b = mu [c][b-1];
593  const auto & mu_c_b_grad = muGrad[c][b-1];
594  const auto & mu_s0 = lambda[lambda0_index];
595  const auto & mu_s1 = lambda[lambda1_index];
596  const auto & mu_s2 = lambda[lambda2_index];
597 
598  const PointScalar muJacobiScaling = mu_s0 + mu_s1 + mu_s2;
599 
600  const PointScalar muLegendreScaling = mu_s0 + mu_s1;
601  Polynomials::shiftedScaledLegendreValues(Pmu, polyOrder_-1, mu_s1, muLegendreScaling);
602 
603  Kokkos::Array<PointScalar, 3> vectorWeight;
604  computeFaceVectorWeight(vectorWeight, nu[0][a-1], nuGrad[0][a-1],
605  nu[1][a-1], nuGrad[1][a-1],
606  nu[2][a-1], nuGrad[2][a-1]);
607 
608  Kokkos::Array<OutputScalar,3> VTRI_00;
609  V_TRI(VTRI_00,0,0,P,P_2ip1,vectorWeight);
610 
611  Kokkos::Array<OutputScalar,3> EE_0;
612  E_E(EE_0, 0, P, s0, s1, s0_grad, s1_grad);
613 
614  for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
615  {
616  for (int i=0; i<=totalPolyOrder; i++)
617  {
618  const int j = totalPolyOrder - i;
619 
620  const double alpha = i*2.0 + 1;
621  Polynomials::shiftedScaledJacobiValues( P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
622  Polynomials::shiftedScaledJacobiValues(Pmu_2ip1, alpha, polyOrder_-1, mu_s2, muJacobiScaling);
623 
624  Kokkos::Array<OutputScalar,3> VTRI; // output from V_TRI
625  V_TRI(VTRI, i, j, P, P_2ip1, vectorWeight);
626 
627  Kokkos::Array<OutputScalar,3> one_over_mu_VTRI_mu; // (B.42) result
628  V_TRI_B42(one_over_mu_VTRI_mu, VTRI_00, EE_0, s2, mu_c_b, mu_c_b_grad, i, j, Pmu, Pmu_2ip1);
629 
630  for (ordinal_type d=0; d<3; d++)
631  {
632  output_(fieldOrdinalOffset,pointOrdinal,d) = 0.5 * (VTRI[d] * mu_c_b + one_over_mu_VTRI_mu[d]);
633  }
634 
635  fieldOrdinalOffset++;
636  }
637  }
638  }
639  } // triangle faces block
640 
641  // interior functions
642  {
643  // label scratch
644  const auto & Li_muZ01 = scratch1D_1; // used for phi_k^E values in Family I, II, IV
645  const auto & Li_muX01 = scratch1D_2; // used for E_QUAD computations
646  const auto & Li_muY01 = scratch1D_3; // used for E_QUAD computations
647  const auto & Pi_muX01 = scratch1D_4; // used for E_QUAD computations where xi_1 comes first
648  const auto & Pi_muY01 = scratch1D_5; // used for E_QUAD computations where xi_2 comes first
649  const auto & Pi_muZ01 = scratch1D_6; // used for E_QUAD computations where xi_2 comes first
650  const auto & Li_dt_muX01 = scratch1D_7; // used for E_QUAD computations
651  const auto & Li_dt_muY01 = scratch1D_8; // used for E_QUAD computations
652  const auto & Li_dt_muZ01 = scratch1D_9; // used for E_QUAD computations
653 
654  const auto & muX_0 = mu[0][0]; const auto & muX_0_grad = muGrad[0][0];
655  const auto & muX_1 = mu[1][0]; const auto & muX_1_grad = muGrad[1][0];
656  const auto & muY_0 = mu[0][1]; const auto & muY_0_grad = muGrad[0][1];
657  const auto & muY_1 = mu[1][1]; const auto & muY_1_grad = muGrad[1][1];
658  const auto & muZ_0 = mu[0][2]; const auto & muZ_0_grad = muGrad[0][2];
659  const auto & muZ_1 = mu[1][2]; const auto & muZ_1_grad = muGrad[1][2];
660 
661  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muX01, polyOrder_, muX_1, muX_0 + muX_1);
662  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muY01, polyOrder_, muY_1, muY_0 + muY_1);
663  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
664 
665  Polynomials::shiftedScaledLegendreValues(Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
666  Polynomials::shiftedScaledLegendreValues(Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
667  Polynomials::shiftedScaledLegendreValues(Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
668 
669  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muX01, Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
670  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muY01, Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
671  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muZ01, Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
672 
673  // FAMILIES I & II -- divergence-free families
674  // following the ESEAS ordering: k increments first
675  for (int f=0; f<2; f++)
676  {
677  const auto &s0 = (f==0) ? muX_0 : muY_0; const auto & s0_grad = (f==0) ? muX_0_grad : muY_0_grad;
678  const auto &s1 = (f==0) ? muX_1 : muY_1; const auto & s1_grad = (f==0) ? muX_1_grad : muY_1_grad;
679  const auto & t0_grad = (f==0) ? muY_0_grad : muX_0_grad;
680  const auto & t1_grad = (f==0) ? muY_1_grad : muX_1_grad;
681  const auto & Pi_s01 = (f==0) ? Pi_muX01 : Pi_muY01;
682  const auto & Pi_t01 = (f==0) ? Pi_muY01 : Pi_muX01;
683  const auto & Li_t01 = (f==0) ? Li_muY01 : Li_muX01;
684  const auto & Li_dt_t01 = (f==0) ? Li_dt_muY01 : Li_dt_muX01;
685 
686  for (int k=2; k<=polyOrder_; k++)
687  {
688  const auto & phi_k = Li_muZ01(k);
689  Kokkos::Array<OutputScalar,3> phi_k_grad;
690  computeGradHomLi(phi_k_grad, k, Pi_muZ01, Li_dt_muZ01, muZ_0_grad, muZ_1_grad);
691 
692  Kokkos::Array<OutputScalar,3> muZ0_grad_phi_k_plus_phi_k_grad_muZ0;
693  for (ordinal_type d=0; d<3; d++)
694  {
695  muZ0_grad_phi_k_plus_phi_k_grad_muZ0[d] = muZ_0 * phi_k_grad[d] + phi_k * muZ_0_grad[d];
696  }
697 
698  // for reasons that I don't entirely understand, ESEAS switches whether i or j are the fastest-moving indices depending on whether it's family I or II. Following their code, I'm calling the outer loop variable jg, inner ig.
699  // (Cross-code comparisons are considerably simpler if we number the dofs in the same way.)
700  ordinal_type jg_min = (f==0) ? 2 : 0;
701  ordinal_type jg_max = (f==0) ? polyOrder_ : polyOrder_-1;
702  ordinal_type ig_min = (f==0) ? 0 : 2;
703  ordinal_type ig_max = (f==0) ? polyOrder_ -1 : polyOrder_;
704  for (ordinal_type jg=jg_min; jg<=jg_max; jg++)
705  {
706  for (ordinal_type ig=ig_min; ig<=ig_max; ig++)
707  {
708  const ordinal_type &i = (f==0) ? ig : jg;
709  const ordinal_type &j = (f==0) ? jg : ig;
710  Kokkos::Array<OutputScalar,3> EQUAD_ij;
711  Kokkos::Array<OutputScalar,3> curl_EQUAD_ij;
712 
713  E_QUAD(EQUAD_ij, i, j, Pi_s01, s0, s1, s0_grad, s1_grad, Li_t01);
714 
715  E_QUAD_CURL(curl_EQUAD_ij, i, j, Pi_s01, s0, s1, s0_grad, s1_grad,
716  Pi_t01, Li_t01, Li_dt_t01, t0_grad, t1_grad);
717 
718  // first term: muZ_0 phi^E_k curl EQUAD
719  // we can reuse the memory for curl_EQUAD_ij; we won't need the values there again
720  Kokkos::Array<OutputScalar,3> & firstTerm = curl_EQUAD_ij;
721  for (ordinal_type d=0; d<3; d++)
722  {
723  firstTerm[d] *= muZ_0 * phi_k;
724  }
725 
726  Kokkos::Array<OutputScalar,3> secondTerm; //(muZ0 grad phi + phi grad muZ0) x EQUAD
727 
728  cross(secondTerm, muZ0_grad_phi_k_plus_phi_k_grad_muZ0, EQUAD_ij);
729 
730  for (ordinal_type d=0; d<3; d++)
731  {
732  output_(fieldOrdinalOffset,pointOrdinal,d) = firstTerm[d] + secondTerm[d];
733  }
734 
735  fieldOrdinalOffset++;
736  }
737  }
738  }
739  } // family I, II loop
740 
741  // FAMILY III -- a divergence-free family
742  for (int j=2; j<=polyOrder_; j++)
743  {
744  // phi_ij_QUAD: phi_i(mu_X01) * phi_j(mu_Y01) // (following the ESEAS *implementation*; Fuentes et al. (p. 454) actually have the arguments reversed, which leads to a different basis ordering)
745  const auto & phi_j = Li_muY01(j);
746  Kokkos::Array<OutputScalar,3> phi_j_grad;
747  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
748  for (int i=2; i<=polyOrder_; i++)
749  {
750  const auto & phi_i = Li_muX01(i);
751  Kokkos::Array<OutputScalar,3> phi_i_grad;
752  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
753 
754  Kokkos::Array<OutputScalar,3> phi_ij_grad;
755  for (ordinal_type d=0; d<3; d++)
756  {
757  phi_ij_grad[d] = phi_i * phi_j_grad[d] + phi_j * phi_i_grad[d];
758  }
759 
760  Kokkos::Array<OutputScalar,3> cross_product; // phi_ij_grad x grad_muZ0
761  cross(cross_product, phi_ij_grad, muZ_0_grad);
762 
763  ordinal_type n = max(i,j);
764  OutputScalar weight = n * pow(muZ_0,n-1);
765  for (ordinal_type d=0; d<3; d++)
766  {
767  output_(fieldOrdinalOffset,pointOrdinal,d) = weight * cross_product[d];
768  }
769  fieldOrdinalOffset++;
770  }
771  }
772 
773  // FAMILY IV (non-trivial divergences)
774  {
775  const auto muZ_0_squared = muZ_0 * muZ_0;
776  const auto &s0 = muX_0; const auto & s0_grad = muX_0_grad;
777  const auto &s1 = muX_1; const auto & s1_grad = muX_1_grad;
778  const auto &t0 = muY_0; const auto & t0_grad = muY_0_grad;
779  const auto &t1 = muY_1; const auto & t1_grad = muY_1_grad;
780  const auto &Pi = Pi_muX01;
781  const auto &Pj = Pi_muY01;
782  for (int k=2; k<=polyOrder_; k++)
783  {
784  const auto & phi_k = Li_muZ01(k);
785  for (int j=0; j<polyOrder_; j++)
786  {
787  for (int i=0; i<polyOrder_; i++)
788  {
789  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
790  V_QUAD(VQUAD, i, j,
791  Pi, s0, s1, s0_grad, s1_grad,
792  Pj, t0, t1, t0_grad, t1_grad);
793 
794  for (int d=0; d<3; d++)
795  {
796  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_0_squared * phi_k * VQUAD[d];
797  }
798 
799  fieldOrdinalOffset++;
800  }
801  }
802  }
803  }
804 
805  // FAMILY V (non-trivial divergences)
806  {
807  for (int j=2; j<=polyOrder_; j++)
808  {
809  const auto & phi_j = Li_muY01(j);
810  Kokkos::Array<OutputScalar,3> phi_j_grad;
811  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
812 
813  for (int i=2; i<=polyOrder_; i++)
814  {
815  const auto & phi_i = Li_muX01(i);
816  Kokkos::Array<OutputScalar,3> phi_i_grad;
817  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
818 
819  const int n = max(i,j);
820  const OutputScalar muZ_1_nm1 = pow(muZ_1,n-1);
821 
822  Kokkos::Array<OutputScalar,3> VLEFTTRI;
823  V_LEFT_TRI(VLEFTTRI, phi_i, phi_i_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
824 
825  for (int d=0; d<3; d++)
826  {
827  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_nm1 * VLEFTTRI[d];
828  }
829 
830  fieldOrdinalOffset++;
831  }
832  }
833  }
834 
835  // FAMILY VI (non-trivial divergences)
836  for (int i=2; i<=polyOrder_; i++)
837  {
838  const auto & phi_i = Li_muX01(i);
839  Kokkos::Array<OutputScalar,3> phi_i_grad;
840  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
841 
842  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
843  V_RIGHT_TRI(VRIGHTTRI, muY_1, muY_1_grad, phi_i, phi_i_grad, muZ_0, muZ_0_grad);
844 
845  const OutputScalar muZ_1_im1 = pow(muZ_1,i-1);
846 
847  for (int d=0; d<3; d++)
848  {
849  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_im1 * VRIGHTTRI[d];
850  }
851 
852  fieldOrdinalOffset++;
853  }
854 
855  // FAMILY VII (non-trivial divergences)
856  for (int j=2; j<=polyOrder_; j++)
857  {
858  const auto & phi_j = Li_muY01(j);
859  Kokkos::Array<OutputScalar,3> phi_j_grad;
860  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
861 
862  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
863  V_RIGHT_TRI(VRIGHTTRI, muX_1, muX_1_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
864 
865  const OutputScalar muZ_1_jm1 = pow(muZ_1,j-1);
866 
867  for (int d=0; d<3; d++)
868  {
869  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_jm1 * VRIGHTTRI[d];
870  }
871 
872  fieldOrdinalOffset++;
873  }
874  }
875  } // end OPERATOR_VALUE
876  break;
877  case OPERATOR_DIV:
878  {
879  ordinal_type fieldOrdinalOffset = 0;
880  // quadrilateral face
881  {
882  // rename scratch1, scratch2
883  auto & Pi = scratch1D_1;
884  auto & Pj = scratch1D_2;
885 
886  auto & s0 = mu[0][0], s1 = mu[1][0];
887  auto & s0_grad = muGrad[0][0], s1_grad = muGrad[1][0];
888  auto & t0 = mu[0][1], t1 = mu[1][1];
889  auto & t0_grad = muGrad[0][1], t1_grad = muGrad[1][1];
890 
891  Polynomials::shiftedScaledLegendreValues(Pi, polyOrder_-1, s1, s0 + s1);
892  Polynomials::shiftedScaledLegendreValues(Pj, polyOrder_-1, t1, t0 + t1);
893 
894  const auto & muZ0 = mu[0][2];
895  const auto & muZ0_grad = muGrad[0][2];
896  OutputScalar three_mu0_squared = 3.0 * muZ0 * muZ0;
897 
898  // following the ESEAS ordering: j increments first
899  for (int j=0; j<polyOrder_; j++)
900  {
901  for (int i=0; i<polyOrder_; i++)
902  {
903  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
904  V_QUAD(VQUAD, i, j,
905  Pi, s0, s1, s0_grad, s1_grad,
906  Pj, t0, t1, t0_grad, t1_grad);
907 
908  OutputScalar grad_muZ0_dot_VQUAD;
909  dot(grad_muZ0_dot_VQUAD, muZ0_grad, VQUAD);
910 
911  output_(fieldOrdinalOffset,pointOrdinal) = three_mu0_squared * grad_muZ0_dot_VQUAD;
912  fieldOrdinalOffset++;
913  }
914  }
915  } // end quad face block
916 
917  // triangle faces
918  {
919  const auto & P = scratch1D_1; // for V_TRI( nu_012)
920  const auto & P_2ip1 = scratch1D_2;
921  const auto & Pmu = scratch1D_3; // for V_TRI( mu * nu_012)
922  const auto & Pmu_2ip1 = scratch1D_4;
923  for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
924  {
925  // face 0,2 --> a=1, b=2
926  // face 1,3 --> a=2, b=1
927  int a = (faceOrdinal % 2 == 0) ? 1 : 2;
928  int b = 3 - a;
929  // face 0,3 --> c=0
930  // face 1,2 --> c=1
931  int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
932 
933  const auto & s0 = nu[0][a-1]; const auto & s0_grad = nuGrad[0][a-1];
934  const auto & s1 = nu[1][a-1]; const auto & s1_grad = nuGrad[1][a-1];
935  const auto & s2 = nu[2][a-1]; const auto & s2_grad = nuGrad[2][a-1];
936  const PointScalar jacobiScaling = s0 + s1 + s2; // we can actually assume that this is 1; see comment at bottom of p. 425 of Fuentes et al.
937 
938  const PointScalar legendreScaling = s0 + s1;
939  Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
940 
941  const auto lambda0_index = tri_face_vertex_0[faceOrdinal];
942  const auto lambda1_index = tri_face_vertex_1[faceOrdinal];
943  const auto lambda2_index = tri_face_vertex_2[faceOrdinal];
944 
945  const auto & mu_c_b = mu[c][b-1];
946  const auto & mu_c_b_grad = muGrad[c][b-1];
947 
948  const auto & mu_s0 = lambda[lambda0_index];
949  const auto & mu_s1 = lambda[lambda1_index];
950  const auto & mu_s2 = lambda[lambda2_index]; // == s2
951 
952  const PointScalar muJacobiScaling = mu_s0 + mu_s1 + mu_s2;
953 
954  const PointScalar muLegendreScaling = mu_s0 + mu_s1;
955  Polynomials::shiftedScaledLegendreValues(Pmu, polyOrder_-1, mu_s1, muLegendreScaling);
956 
957  Kokkos::Array<PointScalar, 3> vectorWeight;
958  computeFaceVectorWeight(vectorWeight, nu[0][a-1], nuGrad[0][a-1],
959  nu[1][a-1], nuGrad[1][a-1],
960  nu[2][a-1], nuGrad[2][a-1]);
961 
962  Kokkos::Array<PointScalar,3> & mu_s0_grad = lambdaGrad[lambda0_index];
963  Kokkos::Array<PointScalar,3> & mu_s1_grad = lambdaGrad[lambda1_index];
964  Kokkos::Array<PointScalar,3> & mu_s2_grad = lambdaGrad[lambda2_index]; // == s2_grad
965 
966  Kokkos::Array<PointScalar, 3> muVectorWeight;
967  computeFaceVectorWeight(muVectorWeight, mu_s0, mu_s0_grad,
968  mu_s1, mu_s1_grad,
969  mu_s2, mu_s2_grad);
970 
971  OutputScalar muDivWeight;
972  computeFaceDivWeight(muDivWeight, mu_s0_grad, mu_s1_grad, mu_s2_grad);
973 
974  Kokkos::Array<OutputScalar,3> VTRI_00;
975  V_TRI(VTRI_00,0,0,P,P_2ip1,vectorWeight);
976 
977  Kokkos::Array<OutputScalar,3> EE_0;
978  E_E(EE_0, 0, P, s0, s1, s0_grad, s1_grad);
979 
980  for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
981  {
982  for (int i=0; i<=totalPolyOrder; i++)
983  {
984  const int j = totalPolyOrder - i;
985 
986  const double alpha = i*2.0 + 1;
987  Polynomials::shiftedScaledJacobiValues( P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
988  Polynomials::shiftedScaledJacobiValues(Pmu_2ip1, alpha, polyOrder_-1, mu_s2, muJacobiScaling);
989 
990  Kokkos::Array<OutputScalar,3> VTRI; // output from V_TRI
991 
992  V_TRI(VTRI, i, j, P, P_2ip1, vectorWeight);
993 
994  OutputScalar div_one_over_mu_VTRI_mu;
995  V_TRI_B42_DIV(div_one_over_mu_VTRI_mu, VTRI_00, EE_0, s2, s2_grad, mu_c_b, mu_c_b_grad, i, j, Pmu, Pmu_2ip1);
996 
997  output_(fieldOrdinalOffset,pointOrdinal) = 0.5 * (dot(mu_c_b_grad, VTRI) + div_one_over_mu_VTRI_mu);
998 
999  fieldOrdinalOffset++;
1000  }
1001  }
1002  }
1003  } // end triangle face block
1004 
1005  {
1006  // FAMILY I -- divergence free
1007  // following the ESEAS ordering: k increments first
1008  for (int k=2; k<=polyOrder_; k++)
1009  {
1010  for (int j=2; j<=polyOrder_; j++)
1011  {
1012  for (int i=0; i<polyOrder_; i++)
1013  {
1014  output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1015  fieldOrdinalOffset++;
1016  }
1017  }
1018  }
1019 
1020  // FAMILY II -- divergence free
1021  // following the ESEAS ordering: k increments first
1022  for (int k=2; k<=polyOrder_; k++)
1023  {
1024  for (int j=2; j<=polyOrder_; j++)
1025  {
1026  for (int i=0; i<polyOrder_; i++)
1027  {
1028  output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1029  fieldOrdinalOffset++;
1030  }
1031  }
1032  }
1033 
1034  // FAMILY III -- divergence free
1035  for (int j=2; j<=polyOrder_; j++)
1036  {
1037  for (int i=2; i<=polyOrder_; i++)
1038  {
1039  output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1040  fieldOrdinalOffset++;
1041  }
1042  }
1043 
1044  const auto & Li_muZ01 = scratch1D_1; // used in Family IV
1045  const auto & Li_muX01 = scratch1D_2; // used in Family V
1046  const auto & Li_muY01 = scratch1D_3; // used in Family V
1047  const auto & Pi_muX01 = scratch1D_4; // used in Family IV
1048  const auto & Pi_muY01 = scratch1D_5; // used in Family IV
1049  const auto & Pi_muZ01 = scratch1D_6; // used in Family IV
1050  const auto & Li_dt_muX01 = scratch1D_7; // used in Family V
1051  const auto & Li_dt_muY01 = scratch1D_8; // used in Family V
1052  const auto & Li_dt_muZ01 = scratch1D_9; // used in Family IV
1053 
1054  const auto & muX_0 = mu[0][0]; const auto & muX_0_grad = muGrad[0][0];
1055  const auto & muX_1 = mu[1][0]; const auto & muX_1_grad = muGrad[1][0];
1056  const auto & muY_0 = mu[0][1]; const auto & muY_0_grad = muGrad[0][1];
1057  const auto & muY_1 = mu[1][1]; const auto & muY_1_grad = muGrad[1][1];
1058  const auto & muZ_0 = mu[0][2]; const auto & muZ_0_grad = muGrad[0][2];
1059  const auto & muZ_1 = mu[1][2]; const auto & muZ_1_grad = muGrad[1][2];
1060 
1061  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1062  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1063  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1064 
1065  Polynomials::shiftedScaledLegendreValues(Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1066  Polynomials::shiftedScaledLegendreValues(Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1067  Polynomials::shiftedScaledLegendreValues(Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1068 
1069  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muX01, Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1070  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muY01, Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1071  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muZ01, Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1072 
1073  // FAMILY IV -- non-trivial divergences
1074  {
1075  const auto muZ_0_squared = muZ_0 * muZ_0;
1076  const auto &s0 = muX_0; const auto & s0_grad = muX_0_grad;
1077  const auto &s1 = muX_1; const auto & s1_grad = muX_1_grad;
1078  const auto &t0 = muY_0; const auto & t0_grad = muY_0_grad;
1079  const auto &t1 = muY_1; const auto & t1_grad = muY_1_grad;
1080  const auto &Pi = Pi_muX01;
1081  const auto &Pj = Pi_muY01;
1082 
1083  for (int k=2; k<=polyOrder_; k++)
1084  {
1085  const auto & phi_k = Li_muZ01(k);
1086  Kokkos::Array<OutputScalar,3> phi_k_grad;
1087  computeGradHomLi(phi_k_grad, k, Pi_muZ01, Li_dt_muZ01, muZ_0_grad, muZ_1_grad);
1088  for (int j=0; j<polyOrder_; j++)
1089  {
1090  for (int i=0; i<polyOrder_; i++)
1091  {
1092  Kokkos::Array<OutputScalar,3> firstTerm{0,0,0}; // muZ_0_squared * grad phi_k + 2 muZ_0 * phi_k * grad muZ_0
1093  for (int d=0; d<3; d++)
1094  {
1095  firstTerm[d] += muZ_0_squared * phi_k_grad[d] + 2. * muZ_0 * phi_k * muZ_0_grad[d];
1096  }
1097  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
1098  V_QUAD(VQUAD, i, j,
1099  Pi, s0, s1, s0_grad, s1_grad,
1100  Pj, t0, t1, t0_grad, t1_grad);
1101 
1102  OutputScalar divValue;
1103  dot(divValue, firstTerm, VQUAD);
1104  output_(fieldOrdinalOffset,pointOrdinal) = divValue;
1105 
1106  fieldOrdinalOffset++;
1107  }
1108  }
1109  }
1110  }
1111 
1112  // FAMILY V -- non-trivial divergences
1113  {
1114  for (int j=2; j<=polyOrder_; j++)
1115  {
1116  const auto & phi_j = Li_muX01(j);
1117  Kokkos::Array<OutputScalar,3> phi_j_grad;
1118  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
1119 
1120  for (int i=2; i<=polyOrder_; i++)
1121  {
1122  const auto & phi_i = Li_muY01(i);
1123  Kokkos::Array<OutputScalar,3> phi_i_grad;
1124  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
1125 
1126  const int n = max(i,j);
1127  const OutputScalar muZ_1_nm2 = pow(muZ_1,n-2);
1128 
1129  Kokkos::Array<OutputScalar,3> VLEFTTRI;
1130  V_LEFT_TRI(VLEFTTRI, phi_i, phi_i_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
1131 
1132  OutputScalar dot_product;
1133  dot(dot_product, muZ_1_grad, VLEFTTRI);
1134  output_(fieldOrdinalOffset,pointOrdinal) = (n-1) * muZ_1_nm2 * dot_product;
1135 
1136  fieldOrdinalOffset++;
1137  }
1138  }
1139  }
1140 
1141  // FAMILY VI (non-trivial divergences)
1142  for (int i=2; i<=polyOrder_; i++)
1143  {
1144  const auto & phi_i = Li_muX01(i);
1145  Kokkos::Array<OutputScalar,3> phi_i_grad;
1146  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
1147 
1148  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
1149  V_RIGHT_TRI(VRIGHTTRI, muY_1, muY_1_grad, phi_i, phi_i_grad, muZ_0, muZ_0_grad);
1150 
1151  OutputScalar dot_product;
1152  dot(dot_product, muZ_1_grad, VRIGHTTRI);
1153 
1154  const OutputScalar muZ_1_im2 = pow(muZ_1,i-2);
1155  output_(fieldOrdinalOffset,pointOrdinal) = (i-1) * muZ_1_im2 * dot_product;
1156 
1157  fieldOrdinalOffset++;
1158  }
1159 
1160  // FAMILY VII (non-trivial divergences)
1161  for (int j=2; j<=polyOrder_; j++)
1162  {
1163  const auto & phi_j = Li_muY01(j);
1164  Kokkos::Array<OutputScalar,3> phi_j_grad;
1165  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
1166 
1167  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
1168  V_RIGHT_TRI(VRIGHTTRI, muX_1, muX_1_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
1169 
1170  OutputScalar dot_product;
1171  dot(dot_product, muZ_1_grad, VRIGHTTRI);
1172 
1173  const OutputScalar muZ_1_jm2 = pow(muZ_1,j-2);
1174  output_(fieldOrdinalOffset,pointOrdinal) = (j-1) * muZ_1_jm2 * dot_product;
1175 
1176  fieldOrdinalOffset++;
1177  }
1178 
1179  } // end interior function block
1180 
1181  } // end OPERATOR_DIV block
1182  break;
1183  case OPERATOR_GRAD:
1184  case OPERATOR_D1:
1185  case OPERATOR_D2:
1186  case OPERATOR_D3:
1187  case OPERATOR_D4:
1188  case OPERATOR_D5:
1189  case OPERATOR_D6:
1190  case OPERATOR_D7:
1191  case OPERATOR_D8:
1192  case OPERATOR_D9:
1193  case OPERATOR_D10:
1194  INTREPID2_TEST_FOR_ABORT( true,
1195  ">>> ERROR: (Intrepid2::Hierarchical_HDIV_PYR_Functor) Computing of second and higher-order derivatives is not currently supported");
1196  default:
1197  // unsupported operator type
1198  device_assert(false);
1199  }
1200  }
1201 
1202  // Provide the shared memory capacity.
1203  // This function takes the team_size as an argument,
1204  // which allows team_size-dependent allocations.
1205  size_t team_shmem_size (int team_size) const
1206  {
1207  // we use shared memory to create a fast buffer for basis computations
1208  size_t shmem_size = 0;
1209  if (fad_size_output_ > 0)
1210  {
1211  // 1D scratch views (we have up to 9 of them):
1212  shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1, fad_size_output_);
1213  }
1214  else
1215  {
1216  // 1D scratch views (we have up to 9 of them):
1217  shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1);
1218  }
1219 
1220  return shmem_size;
1221  }
1222  };
1223 
1235  template<typename DeviceType,
1236  typename OutputScalar = double,
1237  typename PointScalar = double,
1238  bool useCGBasis = true> // if useCGBasis is true, basis functions are associated with either faces or the interior. If false, basis functions are all associated with interior
1240  : public Basis<DeviceType,OutputScalar,PointScalar>
1241  {
1242  public:
1244 
1245  using typename BasisBase::OrdinalTypeArray1DHost;
1246  using typename BasisBase::OrdinalTypeArray2DHost;
1247 
1248  using typename BasisBase::OutputViewType;
1249  using typename BasisBase::PointViewType;
1250  using typename BasisBase::ScalarViewType;
1251 
1252  using typename BasisBase::ExecutionSpace;
1253 
1254  protected:
1255  int polyOrder_; // the maximum order of the polynomial
1256  EPointType pointType_;
1257  public:
1268  HierarchicalBasis_HDIV_PYR(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
1269  :
1270  polyOrder_(polyOrder),
1271  pointType_(pointType)
1272  {
1273  INTREPID2_TEST_FOR_EXCEPTION(pointType!=POINTTYPE_DEFAULT,std::invalid_argument,"PointType not supported");
1274  const auto & p = polyOrder;
1275  this->basisCardinality_ = p * p + 2 * p * (p+1) + 3 * p * p * (p-1);
1276  this->basisDegree_ = p;
1277  this->basisCellTopologyKey_ = shards::Pyramid<>::key;
1278  this->basisType_ = BASIS_FEM_HIERARCHICAL;
1279  this->basisCoordinates_ = COORDINATES_CARTESIAN;
1280  this->functionSpace_ = FUNCTION_SPACE_HDIV;
1281 
1282  const int degreeLength = 1;
1283  this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("Integrated Legendre H(div) pyramid polynomial degree lookup", this->basisCardinality_, degreeLength);
1284  this->fieldOrdinalH1PolynomialDegree_ = OrdinalTypeArray2DHost("Integrated Legendre H(div) pyramid polynomial H^1 degree lookup", this->basisCardinality_, degreeLength);
1285 
1286  int fieldOrdinalOffset = 0;
1287 
1288  // **** face functions **** //
1289  // one quad face
1290  const int numFunctionsPerQuadFace = p*p;
1291 
1292  // following the ESEAS ordering: j increments first
1293  for (int j=0; j<p; j++)
1294  {
1295  for (int i=0; i<p; i++)
1296  {
1297  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = std::max(i,j);
1298  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = std::max(i,j)+1;
1299  fieldOrdinalOffset++;
1300  }
1301  }
1302 
1303  const int numFunctionsPerTriFace = 2 * p * (p+1) / 4;
1304  const int numTriFaces = 4;
1305  for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
1306  {
1307  for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
1308  {
1309  const int totalFaceDofs = (totalPolyOrder+1) * (totalPolyOrder+2) / 2; // number of dofs for which i+j <= totalPolyOrder
1310  const int totalFaceDofsPrevious = totalPolyOrder * (totalPolyOrder+1) / 2; // number of dofs for which i+j <= totalPolyOrder-1
1311  const int faceDofsForPolyOrder = totalFaceDofs - totalFaceDofsPrevious; // number of dofs for which i+j == totalPolyOrder
1312  for (int i=0; i<faceDofsForPolyOrder; i++)
1313  {
1314  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = totalPolyOrder;
1315  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = totalPolyOrder+1;
1316  fieldOrdinalOffset++;
1317  }
1318  }
1319  }
1320 
1321  // **** interior functions **** //
1322  const int numFunctionsPerVolume = 3 * p * p * (p-1);
1323 
1324  // FAMILY I
1325  // following the ESEAS ordering: k increments first
1326  for (int k=2; k<=polyOrder_; k++)
1327  {
1328  for (int j=2; j<=polyOrder_; j++)
1329  {
1330  for (int i=0; i<polyOrder_; i++)
1331  {
1332  const int max_jk = std::max(j,k);
1333  const int max_ijk = std::max(max_jk,i);
1334  const int max_ip1jk = std::max(max_jk,i+1);
1335  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1336  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jk;
1337  fieldOrdinalOffset++;
1338  }
1339  }
1340  }
1341 
1342  // FAMILY II
1343  for (int k=2; k<=polyOrder_; k++)
1344  {
1345  // ESEAS reverses i,j loop ordering for family II, relative to family I.
1346  // We follow ESEAS for convenience of cross-code comparison.
1347  for (int i=0; i<polyOrder_; i++)
1348  {
1349  for (int j=2; j<=polyOrder_; j++)
1350  {
1351  const int max_jk = std::max(j,k);
1352  const int max_ijk = std::max(max_jk,i);
1353  const int max_ip1jk = std::max(max_jk,i+1);
1354  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1355  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jk;
1356  fieldOrdinalOffset++;
1357  }
1358  }
1359  }
1360 
1361  // FAMILY III
1362  for (int j=2; j<=polyOrder_; j++)
1363  {
1364  for (int i=2; i<=polyOrder_; i++)
1365  {
1366  const int max_ij = std::max(i,j);
1367  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ij;
1368  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ij;
1369  fieldOrdinalOffset++;
1370  }
1371  }
1372 
1373  // FAMILY IV
1374  for (int k=2; k<=polyOrder_; k++)
1375  {
1376  for (int j=0; j<polyOrder_; j++)
1377  {
1378  for (int i=0; i<polyOrder_; i++)
1379  {
1380  const int max_jk = std::max(j,k);
1381  const int max_ijk = std::max(max_jk,i);
1382  const int max_ip1jp1k = std::max(std::max(j+1,k),i+1);
1383  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1384  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jp1k;
1385  fieldOrdinalOffset++;
1386  }
1387  }
1388  }
1389 
1390  // FAMILY V
1391  for (int j=2; j<=polyOrder_; j++)
1392  {
1393  for (int i=2; i<=polyOrder_; i++)
1394  {
1395  const int max_ij = std::max(i,j);
1396  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ij;
1397  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ij;
1398  fieldOrdinalOffset++;
1399  }
1400  }
1401 
1402  // FAMILY VI
1403  for (int i=2; i<=polyOrder_; i++)
1404  {
1405  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = i;
1406  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = i;
1407  fieldOrdinalOffset++;
1408  }
1409 
1410  // FAMILY VII
1411  for (int j=2; j<=polyOrder_; j++)
1412  {
1413  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = j;
1414  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = j;
1415  fieldOrdinalOffset++;
1416  }
1417 
1418  if (fieldOrdinalOffset != this->basisCardinality_)
1419  {
1420  std::cout << "Internal error: basis enumeration is incorrect; fieldOrdinalOffset = " << fieldOrdinalOffset;
1421  std::cout << "; basisCardinality_ = " << this->basisCardinality_ << std::endl;
1422 
1423  INTREPID2_TEST_FOR_EXCEPTION(fieldOrdinalOffset != this->basisCardinality_, std::invalid_argument, "Internal error: basis enumeration is incorrect");
1424  }
1425 
1426  // initialize tags
1427  {
1428  // Intrepid2 vertices:
1429  // 0: (-1,-1, 0)
1430  // 1: ( 1,-1, 0)
1431  // 2: ( 1, 1, 0)
1432  // 3: (-1, 1, 0)
1433  // 4: ( 0, 0, 1)
1434 
1435  // ESEAS vertices:
1436  // 0: ( 0, 0, 0)
1437  // 1: ( 1, 0, 0)
1438  // 2: ( 1, 1, 0)
1439  // 3: ( 0, 1, 0)
1440  // 4: ( 0, 0, 1)
1441 
1442  // The edge numbering appears to match between ESEAS and Intrepid2
1443 
1444  // ESEAS numbers pyramid faces differently from Intrepid2
1445  // See BlendProjectPyraTF in ESEAS.
1446  // See PyramidFaceNodeMap in Shards_BasicTopologies
1447  // ESEAS: 0123, 014, 124, 324, 034
1448  // Intrepid2: 014, 124, 234, 304, 0321
1449 
1450  const int intrepid2FaceOrdinals[5] {4,0,1,2,3}; // index is the ESEAS face ordinal; value is the intrepid2 ordinal
1451 
1452  const auto & cardinality = this->basisCardinality_;
1453 
1454  // Basis-dependent initializations
1455  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
1456  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
1457  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
1458  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
1459 
1460  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
1461  const int faceDim = 2, volumeDim = 3;
1462 
1463  if (useCGBasis) {
1464  {
1465  int tagNumber = 0;
1466  {
1467  // quad face
1468  const int faceOrdinalESEAS = 0;
1469  const int faceOrdinalIntrepid2 = intrepid2FaceOrdinals[faceOrdinalESEAS];
1470 
1471  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerQuadFace; functionOrdinal++)
1472  {
1473  tagView(tagNumber*tagSize+0) = faceDim; // face dimension
1474  tagView(tagNumber*tagSize+1) = faceOrdinalIntrepid2; // face id
1475  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1476  tagView(tagNumber*tagSize+3) = numFunctionsPerQuadFace; // total number of dofs on this face
1477  tagNumber++;
1478  }
1479  }
1480  for (int triFaceOrdinalESEAS=0; triFaceOrdinalESEAS<numTriFaces; triFaceOrdinalESEAS++)
1481  {
1482  const int faceOrdinalESEAS = triFaceOrdinalESEAS + 1;
1483  const int faceOrdinalIntrepid2 = intrepid2FaceOrdinals[faceOrdinalESEAS];
1484  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerTriFace; functionOrdinal++)
1485  {
1486  tagView(tagNumber*tagSize+0) = faceDim; // face dimension
1487  tagView(tagNumber*tagSize+1) = faceOrdinalIntrepid2; // face id
1488  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1489  tagView(tagNumber*tagSize+3) = numFunctionsPerTriFace; // total number of dofs on this face
1490  tagNumber++;
1491  }
1492  }
1493  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerVolume; functionOrdinal++)
1494  {
1495  tagView(tagNumber*tagSize+0) = volumeDim; // volume dimension
1496  tagView(tagNumber*tagSize+1) = 0; // volume id
1497  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1498  tagView(tagNumber*tagSize+3) = numFunctionsPerVolume; // total number of dofs in this volume
1499  tagNumber++;
1500  }
1501  INTREPID2_TEST_FOR_EXCEPTION(tagNumber != this->basisCardinality_, std::invalid_argument, "Internal error: basis tag enumeration is incorrect");
1502  }
1503  } else {
1504  for (ordinal_type i=0;i<cardinality;++i) {
1505  tagView(i*tagSize+0) = volumeDim; // volume dimension
1506  tagView(i*tagSize+1) = 0; // volume ordinal
1507  tagView(i*tagSize+2) = i; // local dof id
1508  tagView(i*tagSize+3) = cardinality; // total number of dofs on this face
1509  }
1510  }
1511 
1512  // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
1513  // tags are constructed on host
1514  this->setOrdinalTagData(this->tagToOrdinal_,
1515  this->ordinalToTag_,
1516  tagView,
1517  this->basisCardinality_,
1518  tagSize,
1519  posScDim,
1520  posScOrd,
1521  posDfOrd);
1522  }
1523  }
1524 
1529  const char* getName() const override {
1530  return "Intrepid2_HierarchicalBasis_HDIV_PYR";
1531  }
1532 
1535  virtual bool requireOrientation() const override {
1536  return (this->getDegree() > 2);
1537  }
1538 
1539  // since the getValues() below only overrides the FEM variant, we specify that
1540  // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
1541  // (It's an error to use the FVD variant on this basis.)
1542  using BasisBase::getValues;
1543 
1562  virtual void getValues( OutputViewType outputValues, const PointViewType inputPoints,
1563  const EOperator operatorType = OPERATOR_VALUE ) const override
1564  {
1565  auto numPoints = inputPoints.extent_int(0);
1566 
1568 
1569  FunctorType functor(operatorType, outputValues, inputPoints, polyOrder_);
1570 
1571  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputScalar>();
1572  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointScalar>();
1573  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1574  const int teamSize = 1; // because of the way the basis functions are computed, we don't have a second level of parallelism...
1575 
1576  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(numPoints,teamSize,vectorSize);
1577  Kokkos::parallel_for("Hierarchical_HDIV_PYR_Functor", policy, functor);
1578  }
1579 
1588  BasisPtr<DeviceType,OutputScalar,PointScalar>
1589  getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override{
1590  const auto & p = this->basisDegree_;
1591  if (subCellDim == 2)
1592  {
1593  if (subCellOrd == 4) // quad basis
1594  {
1596  using HVOL_QUAD = Basis_Derived_HVOL_QUAD<HVOL_LINE>;
1597  return Teuchos::rcp(new HVOL_QUAD(p-1));
1598  }
1599  else // tri basis
1600  {
1602  return Teuchos::rcp(new HVOL_Tri(p-1));
1603  }
1604  }
1605  INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Input parameters out of bounds");
1606  }
1607 
1612  virtual BasisPtr<typename Kokkos::HostSpace::device_type, OutputScalar, PointScalar>
1613  getHostBasis() const override {
1614  using HostDeviceType = typename Kokkos::HostSpace::device_type;
1616  return Teuchos::rcp( new HostBasisType(polyOrder_, pointType_) );
1617  }
1618  };
1619 } // end namespace Intrepid2
1620 
1621 // do ETI with default (double) type
1623 
1624 #endif /* Intrepid2_HierarchicalBasis_HDIV_PYR_h */
Kokkos::View< ordinal_type *, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
View type for 1d host array.
unsigned basisCellTopologyKey_
Identifier of the base topology of the cells for which the basis is defined. See the Shards package f...
Implementation of H(vol) basis on the quadrilateral that is templated on H(vol) on the line...
BasisPtr< DeviceType, OutputScalar, PointScalar > getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override
returns the basis associated to a subCell.
Defines several coordinates and their gradients on the pyramid; maps from Intrepid2 (shards) pyramid ...
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
Functor for computing values for the HierarchicalBasis_HDIV_PYR class.
Kokkos::DynRankView< scalarType, Kokkos::LayoutStride, DeviceType > ScalarViewType
View type for scalars.
Free functions, callable from device code, that implement various polynomials useful in basis definit...
const char * getName() const override
Returns basis name.
KOKKOS_INLINE_FUNCTION void E_QUAD(Kokkos::Array< OutputScalar, 3 > &EQUAD, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &HomPi_s01, const PointScalar &s0, const PointScalar &s1, const Kokkos::Array< PointScalar, 3 > &s0_grad, const Kokkos::Array< PointScalar, 3 > &s1_grad, const OutputScratchView &HomLi_t01) const
EFunctionSpace functionSpace_
The function space in which the basis is defined.
ordinal_type basisDegree_
Degree of the largest complete polynomial space that can be represented by the basis.
virtual KOKKOS_INLINE_FUNCTION void getValues(OutputViewType, const PointViewType, const EOperator, const typename Kokkos::TeamPolicy< ExecutionSpace >::member_type &teamMember, const typename ExecutionSpace::scratch_memory_space &scratchStorage, const ordinal_type subcellDim=-1, const ordinal_type subcellOrdinal=-1) const
Team-level evaluation of basis functions on a reference cell.
Header function for Intrepid2::Util class and other utility functions.
KOKKOS_INLINE_FUNCTION void V_LEFT_TRI(Kokkos::Array< OutputScalar, 3 > &VLEFTTRI, const OutputScalar &phi_i, const Kokkos::Array< OutputScalar, 3 > &phi_i_grad, const OutputScalar &phi_j, const Kokkos::Array< OutputScalar, 3 > &phi_j_grad, const OutputScalar &t0, const Kokkos::Array< OutputScalar, 3 > &t0_grad) const
See Fuentes et al. (p. 455), definition of V_{ij}^{}.
HierarchicalBasis_HDIV_PYR(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
Constructor.
KOKKOS_INLINE_FUNCTION void cross(Kokkos::Array< OutputScalar, 3 > &c, const Kokkos::Array< OutputScalar, 3 > &a, const Kokkos::Array< OutputScalar, 3 > &b) const
cross product: c = a x b
Kokkos::View< ordinal_type **, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray2DHost
View type for 2d host array.
Basis defining integrated Legendre basis on the line, a polynomial subspace of H(grad) on the line...
Basis defining Legendre basis on the line, a polynomial subspace of L^2 (a.k.a. H(vol)) on the line...
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, DeviceType > OutputViewType
View type for basis value output.
KOKKOS_INLINE_FUNCTION void V_TRI_B42_DIV(OutputScalar &div_VTRI_mus0_mus1_s2_over_mu, const Kokkos::Array< OutputScalar, 3 > &VTRI_00_s0_s1_s2, const Kokkos::Array< OutputScalar, 3 > &EE_0_s0_s1, const OutputScalar &s2, const Kokkos::Array< OutputScalar, 3 > &s2_grad, const OutputScalar &mu, const Kokkos::Array< OutputScalar, 3 > &mu_grad, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &P_mus0_mus1, const OutputScratchView &P_2ip1_mus0pmus1_s2) const
See Equation (B.42) in Fuentes et al. Computes 1/mu V^{tri}_ij(mu s0, mu s1, s2). ...
H(vol) basis on the line based on Legendre polynomials.
ordinal_type basisCardinality_
Cardinality of the basis, i.e., the number of basis functions/degrees-of-freedom. ...
KOKKOS_INLINE_FUNCTION void V_TRI_B42(Kokkos::Array< OutputScalar, 3 > &VTRI_mus0_mus1_s2_over_mu, const Kokkos::Array< OutputScalar, 3 > &VTRI_00_s0_s1_s2, const Kokkos::Array< OutputScalar, 3 > &EE_0_s0_s1, const OutputScalar &s2, const OutputScalar &mu, const Kokkos::Array< OutputScalar, 3 > &mu_grad, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &P_mus0_mus1, const OutputScratchView &P_2ip1_mus0pmus1_s2) const
See Equation (B.42) in Fuentes et al. Computes 1/mu V^{tri}_ij(mu s0, mu s1, s2). ...
OrdinalTypeArray3DHost tagToOrdinal_
DoF tag to ordinal lookup table.
KOKKOS_INLINE_FUNCTION void dot(OutputScalar &c, const Kokkos::Array< OutputScalar, 3 > &a, const Kokkos::Array< OutputScalar, 3 > &b) const
dot product: c = a b
Implementation of H(vol) basis on the quadrilateral that is templated on H(vol) on the line...
virtual BasisPtr< typename Kokkos::HostSpace::device_type, OutputScalar, PointScalar > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
KOKKOS_INLINE_FUNCTION void V_QUAD(Kokkos::Array< OutputScalar, 3 > &VQUAD, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &PHom_s, const PointScalar &s0, const PointScalar &s1, const Kokkos::Array< PointScalar, 3 > &s0_grad, const Kokkos::Array< PointScalar, 3 > &s1_grad, const OutputScratchView &PHom_t, const PointScalar &t0, const PointScalar &t1, const Kokkos::Array< PointScalar, 3 > &t0_grad, const Kokkos::Array< PointScalar, 3 > &t1_grad) const
OrdinalTypeArray2DHost ordinalToTag_
&quot;true&quot; if tagToOrdinal_ and ordinalToTag_ have been initialized
ordinal_type getDegree() const
Returns the degree of the basis.
ECoordinates basisCoordinates_
The coordinate system for which the basis is defined.
virtual bool requireOrientation() const override
True if orientation is required.
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, DeviceType > OutputViewType
View type for basis value output.
OrdinalTypeArray2DHost fieldOrdinalH1PolynomialDegree_
H^1 polynomial degree for each degree of freedom. Only defined for hierarchical bases right now...
OrdinalTypeArray2DHost fieldOrdinalPolynomialDegree_
Polynomial degree for each degree of freedom. Only defined for hierarchical bases right now...
Basis defining Legendre basis on the line, a polynomial subspace of H(vol) on the line: extension to ...
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, DeviceType > PointViewType
View type for input points.
void setOrdinalTagData(OrdinalTypeView3D &tagToOrdinal, OrdinalTypeView2D &ordinalToTag, const OrdinalTypeView1D tags, const ordinal_type basisCard, const ordinal_type tagSize, const ordinal_type posScDim, const ordinal_type posScOrd, const ordinal_type posDfOrd)
Fills ordinalToTag_ and tagToOrdinal_ by basis-specific tag data.
virtual void getValues(OutputViewType outputValues, const PointViewType inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell.
H(vol) basis on the triangle based on integrated Legendre polynomials.
KOKKOS_INLINE_FUNCTION void V_RIGHT_TRI(Kokkos::Array< OutputScalar, 3 > &VRIGHTTRI, const OutputScalar &mu1, const Kokkos::Array< OutputScalar, 3 > &mu1_grad, const OutputScalar &phi_i, const Kokkos::Array< OutputScalar, 3 > &phi_i_grad, const OutputScalar &t0, const Kokkos::Array< OutputScalar, 3 > &t0_grad) const
See Fuentes et al. (p. 455), definition of V_{ij}^{}.
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, DeviceType > PointViewType
View type for input points.
Header file for the abstract base class Intrepid2::Basis.
typename DeviceType::execution_space ExecutionSpace
(Kokkos) Execution space for basis.
Kokkos::View< ordinal_type *, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
View type for 1d host array.