Intrepid2
Intrepid2_HierarchicalBasis_HDIV_PYR.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ************************************************************************
3 //
4 // Intrepid2 Package
5 // Copyright (2007) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Mauro Perego (mperego@sandia.gov) or
38 // Nate Roberts (nvrober@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 
52 #ifndef Intrepid2_HierarchicalBasis_HDIV_PYR_h
53 #define Intrepid2_HierarchicalBasis_HDIV_PYR_h
54 
55 #include <Kokkos_DynRankView.hpp>
56 
57 #include <Intrepid2_config.h>
58 
59 #include "Intrepid2_Basis.hpp"
65 #include "Intrepid2_Utils.hpp"
66 
67 #include <math.h>
68 
69 #include "Teuchos_RCP.hpp"
70 
71 namespace Intrepid2
72 {
78  template<class DeviceType, class OutputScalar, class PointScalar,
79  class OutputFieldType, class InputPointsType>
81  {
82  using ExecutionSpace = typename DeviceType::execution_space;
83  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
84  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
85  using OutputScratchView2D = Kokkos::View<OutputScalar**,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
86  using PointScratchView = Kokkos::View<PointScalar*, ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
87 
88  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
89  using TeamMember = typename TeamPolicy::member_type;
90 
91  EOperator opType_;
92 
93  OutputFieldType output_; // F,P
94  InputPointsType inputPoints_; // P,D
95 
96  int polyOrder_;
97  bool useCGBasis_;
98  int numFields_, numPoints_;
99 
100  size_t fad_size_output_;
101 
102  static const int numVertices = 5;
103  static const int numMixedEdges = 4;
104  static const int numTriEdges = 4;
105  static const int numEdges = 8;
106  // the following ordering of the edges matches that used by ESEAS
107  // (it *looks* like this is what ESEAS uses; the basis comparison tests will clarify whether I've read their code correctly)
108  // see also PyramidEdgeNodeMap in Shards_BasicTopologies.hpp
109  const int edge_start_[numEdges] = {0,1,2,3,0,1,2,3}; // edge i is from edge_start_[i] to edge_end_[i]
110  const int edge_end_[numEdges] = {1,2,3,0,4,4,4,4}; // edge i is from edge_start_[i] to edge_end_[i]
111 
112  // quadrilateral face comes first
113  static const int numQuadFaces = 1;
114  static const int numTriFaces = 4;
115 
116  // face ordering matches ESEAS. (See BlendProjectPyraTF in ESEAS.)
117  const int tri_face_vertex_0[numTriFaces] = {0,1,3,0}; // faces are abc where 0 ≤ a < b < c ≤ 3
118  const int tri_face_vertex_1[numTriFaces] = {1,2,2,3};
119  const int tri_face_vertex_2[numTriFaces] = {4,4,4,4};
120 
121  Hierarchical_HDIV_PYR_Functor(EOperator opType, OutputFieldType output, InputPointsType inputPoints,
122  int polyOrder)
123  : opType_(opType), output_(output), inputPoints_(inputPoints),
124  polyOrder_(polyOrder),
125  fad_size_output_(getScalarDimensionForView(output))
126  {
127  numFields_ = output.extent_int(0);
128  numPoints_ = output.extent_int(1);
129  const auto & p = polyOrder;
130  const auto basisCardinality = p * p + 2 * p * (p+1) + 3 * p * p * (p-1);
131 
132  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != inputPoints.extent_int(0), std::invalid_argument, "point counts need to match!");
133  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != basisCardinality, std::invalid_argument, "output field size does not match basis cardinality");
134  }
135 
137  KOKKOS_INLINE_FUNCTION
138  void cross(Kokkos::Array<OutputScalar,3> &c,
139  const Kokkos::Array<OutputScalar,3> &a,
140  const Kokkos::Array<OutputScalar,3> &b) const
141  {
142  c[0] = a[1] * b[2] - a[2] * b[1];
143  c[1] = a[2] * b[0] - a[0] * b[2];
144  c[2] = a[0] * b[1] - a[1] * b[0];
145  }
146 
148  KOKKOS_INLINE_FUNCTION
149  void dot(OutputScalar &c,
150  const Kokkos::Array<OutputScalar,3> &a,
151  const Kokkos::Array<OutputScalar,3> &b) const
152  {
153  c = 0;
154  for (ordinal_type d=0; d<3; d++)
155  {
156  c += a[d] * b[d];
157  }
158  }
159 
160  KOKKOS_INLINE_FUNCTION
161  OutputScalar dot(const Kokkos::Array<OutputScalar,3> &a,
162  const Kokkos::Array<OutputScalar,3> &b) const
163  {
164  OutputScalar c = 0;
165  for (ordinal_type d=0; d<3; d++)
166  {
167  c += a[d] * b[d];
168  }
169  return c;
170  }
171 
172  KOKKOS_INLINE_FUNCTION
173  void E_E(Kokkos::Array<OutputScalar,3> &EE,
174  const ordinal_type &i,
175  const OutputScratchView &PHom,
176  const PointScalar &s0, const PointScalar &s1,
177  const Kokkos::Array<PointScalar,3> &s0_grad,
178  const Kokkos::Array<PointScalar,3> &s1_grad) const
179  {
180  const auto & P_i = PHom(i);
181  for (ordinal_type d=0; d<3; d++)
182  {
183  EE[d] = P_i * (s0 * s1_grad[d] - s1 * s0_grad[d]);
184  }
185  }
186 
187  KOKKOS_INLINE_FUNCTION
188  void E_E_CURL(Kokkos::Array<OutputScalar,3> &curl_EE,
189  const ordinal_type &i,
190  const OutputScratchView &PHom,
191  const PointScalar &s0, const PointScalar &s1,
192  const Kokkos::Array<PointScalar,3> &s0_grad,
193  const Kokkos::Array<PointScalar,3> &s1_grad) const
194  {
195  // curl (E_i^E)(s0,s1) = (i+2) [P_i](s0,s1) (grad s0 x grad s1)
196  OutputScalar ip2_Pi = (i+2) * PHom(i);
197  cross(curl_EE, s0_grad, s1_grad);
198  for (ordinal_type d=0; d<3; d++)
199  {
200  curl_EE[d] *= ip2_Pi;
201  }
202  }
203 
206  KOKKOS_INLINE_FUNCTION
207  void V_QUAD(Kokkos::Array<OutputScalar,3> &VQUAD,
208  const ordinal_type &i, const ordinal_type &j,
209  const OutputScratchView &PHom_s,
210  const PointScalar &s0, const PointScalar &s1,
211  const Kokkos::Array<PointScalar,3> &s0_grad,
212  const Kokkos::Array<PointScalar,3> &s1_grad,
213  const OutputScratchView &PHom_t,
214  const PointScalar &t0, const PointScalar &t1,
215  const Kokkos::Array<PointScalar,3> &t0_grad,
216  const Kokkos::Array<PointScalar,3> &t1_grad) const
217  {
218  Kokkos::Array<OutputScalar,3> EE_i, EE_j;
219 
220  E_E(EE_i, i, PHom_s, s0, s1, s0_grad, s1_grad);
221  E_E(EE_j, j, PHom_t, t0, t1, t0_grad, t1_grad);
222 
223  // VQUAD = EE_i x EE_j:
224  cross(VQUAD, EE_i, EE_j);
225  }
226 
229  KOKKOS_INLINE_FUNCTION
230  void E_QUAD(Kokkos::Array<OutputScalar,3> &EQUAD,
231  const ordinal_type &i, const ordinal_type &j,
232  const OutputScratchView &HomPi_s01,
233  const PointScalar &s0, const PointScalar &s1,
234  const Kokkos::Array<PointScalar,3> &s0_grad,
235  const Kokkos::Array<PointScalar,3> &s1_grad,
236  const OutputScratchView &HomLi_t01) const
237  {
238  const OutputScalar &phiE_j = HomLi_t01(j);
239 
240  Kokkos::Array<OutputScalar,3> EE_i;
241  E_E(EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
242 
243  for (ordinal_type d=0; d<3; d++)
244  {
245  EQUAD[d] = phiE_j * EE_i[d];
246  }
247  }
248 
249  KOKKOS_INLINE_FUNCTION
250  void E_QUAD_CURL(Kokkos::Array<OutputScalar,3> &EQUAD_CURL,
251  const ordinal_type &i, const ordinal_type &j,
252  const OutputScratchView &HomPi_s01,
253  const PointScalar &s0, const PointScalar &s1,
254  const Kokkos::Array<PointScalar,3> &s0_grad,
255  const Kokkos::Array<PointScalar,3> &s1_grad,
256  const OutputScratchView &HomPj_t01,
257  const OutputScratchView &HomLj_t01,
258  const OutputScratchView &HomLj_dt_t01,
259  const Kokkos::Array<PointScalar,3> &t0_grad,
260  const Kokkos::Array<PointScalar,3> &t1_grad) const
261  {
262  const OutputScalar &phiE_j = HomLj_t01(j);
263 
264  Kokkos::Array<OutputScalar,3> curl_EE_i;
265  E_E_CURL(curl_EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
266 
267  Kokkos::Array<OutputScalar,3> EE_i;
268  E_E(EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
269 
270  Kokkos::Array<OutputScalar,3> grad_phiE_j;
271  computeGradHomLi(grad_phiE_j, j, HomPj_t01, HomLj_dt_t01, t0_grad, t1_grad);
272 
273  cross(EQUAD_CURL, grad_phiE_j, EE_i);
274  for (ordinal_type d=0; d<3; d++)
275  {
276  EQUAD_CURL[d] += phiE_j * curl_EE_i[d];
277  }
278  }
279 
281  KOKKOS_INLINE_FUNCTION
282  void V_LEFT_TRI(Kokkos::Array<OutputScalar,3> &VLEFTTRI,
283  const OutputScalar &phi_i, const Kokkos::Array<OutputScalar,3> &phi_i_grad,
284  const OutputScalar &phi_j, const Kokkos::Array<OutputScalar,3> &phi_j_grad,
285  const OutputScalar &t0, const Kokkos::Array<OutputScalar,3> &t0_grad) const {
286  cross(VLEFTTRI, phi_i_grad, phi_j_grad);
287  const OutputScalar t0_2 = t0 * t0;
288  for (ordinal_type d=0; d<3; d++)
289  {
290  VLEFTTRI[d] *= t0_2;
291  }
292 
293  Kokkos::Array<OutputScalar,3> tmp_t0_grad_t0, tmp_diff, tmp_cross;
294  for (ordinal_type d=0; d<3; d++)
295  {
296  tmp_t0_grad_t0[d] = t0 * t0_grad[d];
297  tmp_diff[d] = phi_i * phi_j_grad[d] - phi_j * phi_i_grad[d];
298  }
299  cross(tmp_cross, tmp_t0_grad_t0, tmp_diff);
300 
301  for (ordinal_type d=0; d<3; d++)
302  {
303  VLEFTTRI[d] += tmp_cross[d];
304  }
305  };
306 
307 
309  KOKKOS_INLINE_FUNCTION
310  void V_RIGHT_TRI(Kokkos::Array<OutputScalar,3> &VRIGHTTRI,
311  const OutputScalar &mu1, const Kokkos::Array<OutputScalar,3> &mu1_grad,
312  const OutputScalar &phi_i, const Kokkos::Array<OutputScalar,3> &phi_i_grad,
313  const OutputScalar &t0, const Kokkos::Array<OutputScalar,3> &t0_grad) const {
314  Kokkos::Array<OutputScalar,3> left_vector; // left vector in the cross product we take below.
315 
316  const OutputScalar t0_2 = t0 * t0;
317  for (ordinal_type d=0; d<3; d++)
318  {
319  left_vector[d] = t0_2 * phi_i_grad[d] + 2. * t0 * phi_i * t0_grad[d];
320  }
321  cross(VRIGHTTRI, left_vector, mu1_grad);
322  };
323 
324  // This is the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
325  KOKKOS_INLINE_FUNCTION
326  void V_TRI(Kokkos::Array<OutputScalar,3> &VTRI,
327  const ordinal_type &i, // i >= 0
328  const ordinal_type &j, // j >= 0
329  const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
330  const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
331  const Kokkos::Array<PointScalar,3> &vectorWeight) const // s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1) -- see computeFaceVectorWeight()
332  {
333  const auto &P_i = P(i);
334  const auto &P_2ip1_j = P_2ip1(j);
335 
336  for (ordinal_type d=0; d<3; d++)
337  {
338  VTRI[d] = P_i * P_2ip1_j * vectorWeight[d];
339  }
340  }
341 
342  // Divergence of the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
343  KOKKOS_INLINE_FUNCTION
344  void V_TRI_DIV(OutputScalar &VTRI_DIV,
345  const ordinal_type &i, // i >= 0
346  const ordinal_type &j, // j >= 0
347  const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
348  const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
349  const OutputScalar &divWeight) const // grad s0 \dot (grad s1 x grad s2)
350  {
351  const auto &P_i = P(i);
352  const auto &P_2ip1_j = P_2ip1(j);
353 
354  VTRI_DIV = (i + j + 3.) * P_i * P_2ip1_j * divWeight;
355  }
356 
358  KOKKOS_INLINE_FUNCTION
359  void V_TRI_B42(Kokkos::Array<OutputScalar,3> &VTRI_mus0_mus1_s2_over_mu,
360  const Kokkos::Array<OutputScalar,3> &VTRI_00_s0_s1_s2,
361  const Kokkos::Array<OutputScalar,3> &EE_0_s0_s1,
362  const OutputScalar &s2,
363  const OutputScalar &mu, const Kokkos::Array<OutputScalar,3> &mu_grad,
364  const ordinal_type &i, // i >= 0
365  const ordinal_type &j, // j >= 0
366  const OutputScratchView &P_mus0_mus1, // container in which shiftedScaledLegendreValues have been computed for the appropriate face, with arguments (mu s0, mu s1)
367  const OutputScratchView &P_2ip1_mus0pmus1_s2 // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face, with arguments (mu s0 + mu s1, s2)
368  ) const
369  {
370  const auto &Pi_mus0_mus1 = P_mus0_mus1(i);
371  const auto &Pj_2ip1_mus0pmus1_s2 = P_2ip1_mus0pmus1_s2(j);
372 
373  // place s2 (grad mu) x E^E_0 in result container
374  cross(VTRI_mus0_mus1_s2_over_mu, mu_grad, EE_0_s0_s1);
375  for (ordinal_type d=0; d<3; d++)
376  {
377  VTRI_mus0_mus1_s2_over_mu[d] *= s2;
378  }
379 
380  // add mu V^{tri}_00 to it:
381  for (ordinal_type d=0; d<3; d++)
382  {
383  VTRI_mus0_mus1_s2_over_mu[d] += mu * VTRI_00_s0_s1_s2[d];
384  }
385 
386  // multiply by [P_i, P^{2i+1}_j]
387  for (ordinal_type d=0; d<3; d++)
388  {
389  VTRI_mus0_mus1_s2_over_mu[d] *= Pi_mus0_mus1 * Pj_2ip1_mus0pmus1_s2;
390  }
391  }
392 
394  KOKKOS_INLINE_FUNCTION
395  void V_TRI_B42_DIV(OutputScalar &div_VTRI_mus0_mus1_s2_over_mu,
396  const Kokkos::Array<OutputScalar,3> &VTRI_00_s0_s1_s2,
397  const Kokkos::Array<OutputScalar,3> &EE_0_s0_s1,
398  const OutputScalar &s2, const Kokkos::Array<OutputScalar,3> &s2_grad,
399  const OutputScalar &mu, const Kokkos::Array<OutputScalar,3> &mu_grad,
400  const ordinal_type &i, // i >= 0
401  const ordinal_type &j, // j >= 0
402  const OutputScratchView &P_mus0_mus1, // container in which shiftedScaledLegendreValues have been computed for the appropriate face, with arguments (mu s0, mu s1)
403  const OutputScratchView &P_2ip1_mus0pmus1_s2 // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face, with arguments (mu s0 + mu s1, s2)
404  ) const
405  {
406  const auto &Pi_mus0_mus1 = P_mus0_mus1(i);
407  const auto &Pj_2ip1_mus0pmus1_s2 = P_2ip1_mus0pmus1_s2(j);
408 
409  Kokkos::Array<OutputScalar,3> vector;
410 
411  // place E^E_0 x grad s2 in scratch vector container
412  cross(vector, EE_0_s0_s1, s2_grad);
413  // multiply by (i+j+3)
414  for (ordinal_type d=0; d<3; d++)
415  {
416  vector[d] *= i+j+3.;
417  }
418  // subtract V_00
419  for (ordinal_type d=0; d<3; d++)
420  {
421  vector[d] -= VTRI_00_s0_s1_s2[d];
422  }
423 
424  OutputScalar dot_product;
425  dot(dot_product, mu_grad, vector);
426 
427  div_VTRI_mus0_mus1_s2_over_mu = Pi_mus0_mus1 * Pj_2ip1_mus0pmus1_s2 * dot_product;
428  }
429 
430  KOKKOS_INLINE_FUNCTION
431  void computeFaceVectorWeight(Kokkos::Array<OutputScalar,3> &vectorWeight,
432  const PointScalar &s0, const Kokkos::Array<PointScalar,3> &s0Grad,
433  const PointScalar &s1, const Kokkos::Array<PointScalar,3> &s1Grad,
434  const PointScalar &s2, const Kokkos::Array<PointScalar,3> &s2Grad) const
435  {
436  // compute s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1)
437 
438  Kokkos::Array<Kokkos::Array<PointScalar,3>,3> cross_products;
439 
440  cross(cross_products[0], s1Grad, s2Grad);
441  cross(cross_products[1], s2Grad, s0Grad);
442  cross(cross_products[2], s0Grad, s1Grad);
443 
444  Kokkos::Array<PointScalar,3> s {s0,s1,s2};
445 
446  for (ordinal_type d=0; d<3; d++)
447  {
448  OutputScalar v_d = 0;
449  for (ordinal_type i=0; i<3; i++)
450  {
451  v_d += s[i] * cross_products[i][d];
452  }
453  vectorWeight[d] = v_d;
454  }
455  }
456 
457  KOKKOS_INLINE_FUNCTION
458  void computeFaceDivWeight(OutputScalar &divWeight,
459  const Kokkos::Array<PointScalar,3> &s0Grad,
460  const Kokkos::Array<PointScalar,3> &s1Grad,
461  const Kokkos::Array<PointScalar,3> &s2Grad) const
462  {
463  // grad s0 \dot (grad s1 x grad s2)
464 
465  Kokkos::Array<PointScalar,3> grad_s1_cross_grad_s2;
466  cross(grad_s1_cross_grad_s2, s1Grad, s2Grad);
467 
468  dot(divWeight, s0Grad, grad_s1_cross_grad_s2);
469  }
470 
471  KOKKOS_INLINE_FUNCTION
472  void computeGradHomLi(Kokkos::Array<OutputScalar,3> &HomLi_grad, // grad [L_i](s0,s1)
473  const ordinal_type i,
474  const OutputScratchView &HomPi_s0s1, // [P_i](s0,s1)
475  const OutputScratchView &HomLi_dt_s0s1, // [d/dt L_i](s0,s1)
476  const Kokkos::Array<PointScalar,3> &s0Grad,
477  const Kokkos::Array<PointScalar,3> &s1Grad) const
478  {
479 // grad [L_i](s0,s1) = [P_{i-1}](s0,s1) * grad s1 + [R_{i-1}](s0,s1) * grad (s0 + s1)
480  const auto & R_i_minus_1 = HomLi_dt_s0s1(i); // d/dt L_i = R_{i-1}
481  const auto & P_i_minus_1 = HomPi_s0s1(i-1);
482  for (ordinal_type d=0; d<3; d++)
483  {
484  HomLi_grad[d] = P_i_minus_1 * s1Grad[d] + R_i_minus_1 * (s0Grad[d] + s1Grad[d]);
485  }
486  }
487 
488  KOKKOS_INLINE_FUNCTION
489  void operator()( const TeamMember & teamMember ) const
490  {
491  auto pointOrdinal = teamMember.league_rank();
492  OutputScratchView scratch1D_1, scratch1D_2, scratch1D_3;
493  OutputScratchView scratch1D_4, scratch1D_5, scratch1D_6;
494  OutputScratchView scratch1D_7, scratch1D_8, scratch1D_9;
495  if (fad_size_output_ > 0) {
496  scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
497  scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
498  scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
499  scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
500  scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
501  scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
502  scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
503  scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
504  scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
505  }
506  else {
507  scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
508  scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
509  scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
510  scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
511  scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
512  scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
513  scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
514  scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
515  scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
516  }
517 
518  const auto & x = inputPoints_(pointOrdinal,0);
519  const auto & y = inputPoints_(pointOrdinal,1);
520  const auto & z = inputPoints_(pointOrdinal,2);
521 
522  // Intrepid2 uses (-1,1)^2 for x,y
523  // ESEAS uses (0,1)^2
524 
525  Kokkos::Array<PointScalar,3> coords;
526  transformToESEASPyramid<>(coords[0], coords[1], coords[2], x, y, z); // map x,y coordinates from (-z,z)^2 to (0,z)^2
527 
528  // pyramid "affine" coordinates and gradients get stored in lambda, lambdaGrad:
529  using Kokkos::Array;
530  Array<PointScalar,5> lambda;
531  Array<Kokkos::Array<PointScalar,3>,5> lambdaGrad;
532 
533  Array<Array<PointScalar,3>,2> mu;
534  Array<Array<Array<PointScalar,3>,3>,2> muGrad;
535 
536  Array<Array<PointScalar,2>,3> nu;
537  Array<Array<Array<PointScalar,3>,2>,3> nuGrad;
538 
539  affinePyramid(lambda, lambdaGrad, mu, muGrad, nu, nuGrad, coords);
540 
541  switch (opType_)
542  {
543  case OPERATOR_VALUE:
544  {
545  ordinal_type fieldOrdinalOffset = 0;
546  // quadrilateral face
547  {
548  // rename scratch1, scratch2
549  auto & Pi = scratch1D_1;
550  auto & Pj = scratch1D_2;
551 
552  auto & s0 = mu[0][0], & s1 = mu[1][0];
553  auto & s0_grad = muGrad[0][0], & s1_grad = muGrad[1][0];
554  auto & t0 = mu[0][1], & t1 = mu[1][1];
555  auto & t0_grad = muGrad[0][1], & t1_grad = muGrad[1][1];
556 
557  Polynomials::shiftedScaledLegendreValues(Pi, polyOrder_-1, s1, s0 + s1);
558  Polynomials::shiftedScaledLegendreValues(Pj, polyOrder_-1, t1, t0 + t1);
559 
560  const auto & muZ_0 = mu[0][2];
561  OutputScalar mu0_cubed = muZ_0 * muZ_0 * muZ_0;
562 
563  // following the ESEAS ordering: j increments first
564  for (int j=0; j<polyOrder_; j++)
565  {
566  for (int i=0; i<polyOrder_; i++)
567  {
568  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
569  V_QUAD(VQUAD, i, j,
570  Pi, s0, s1, s0_grad, s1_grad,
571  Pj, t0, t1, t0_grad, t1_grad);
572 
573  for (ordinal_type d=0; d<3; d++)
574  {
575  output_(fieldOrdinalOffset,pointOrdinal,d) = mu0_cubed * VQUAD[d];
576  }
577  fieldOrdinalOffset++;
578  }
579  }
580  }
581 
582  // triangle faces
583  {
584  /*
585  Face functions for triangular face (d,e,f) given by:
586  1/2 * ( mu_c^{zeta,xi_b} * V_TRI_ij(nu_012^{zeta,xi_a})
587  + 1/mu_c^{zeta,xi_b} * V_TRI_ij(nu_012^{zeta,xi_a} * mu_c^{zeta,xi_b}) )
588  but the division by mu_c^{zeta,xi_b} should ideally be avoided in computations; there is
589  an alternate expression, not implemented by ESEAS. We start with the above expression
590  so that we can get agreement with ESEAS. Hopefully after that we can do the alternative,
591  and confirm that we maintain agreement with ESEAS for points where ESEAS does not generate
592  nans.
593 
594 
595  where s0, s1, s2 are nu[0][a-1],nu[1][a-1],nu[2][a-1] (nu_012 above)
596  and (a,b) = (1,2) for faces 0,2; (a,b) = (2,1) for others;
597  c = 0 for faces 0,3; c = 1 for others
598  */
599  const auto & P = scratch1D_1; // for V_TRI( nu_012)
600  const auto & P_2ip1 = scratch1D_2;
601  const auto & Pmu = scratch1D_3; // for V_TRI( mu * nu_012)
602  const auto & Pmu_2ip1 = scratch1D_4;
603  for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
604  {
605  // face 0,2 --> a=1, b=2
606  // face 1,3 --> a=2, b=1
607  int a = (faceOrdinal % 2 == 0) ? 1 : 2;
608  int b = 3 - a;
609  // face 0,3 --> c=0
610  // face 1,2 --> c=1
611  int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
612 
613  const auto & s0 = nu[0][a-1]; const auto & s0_grad = nuGrad[0][a-1];
614  const auto & s1 = nu[1][a-1]; const auto & s1_grad = nuGrad[1][a-1];
615  const auto & s2 = nu[2][a-1];
616  const PointScalar jacobiScaling = s0 + s1 + s2;
617 
618  const PointScalar legendreScaling = s0 + s1;
619  Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
620 
621  const auto lambda0_index = tri_face_vertex_0[faceOrdinal];
622  const auto lambda1_index = tri_face_vertex_1[faceOrdinal];
623  const auto lambda2_index = tri_face_vertex_2[faceOrdinal];
624 
625  const auto & mu_c_b = mu [c][b-1];
626  const auto & mu_c_b_grad = muGrad[c][b-1];
627  const auto & mu_s0 = lambda[lambda0_index];
628  const auto & mu_s1 = lambda[lambda1_index];
629  const auto & mu_s2 = lambda[lambda2_index];
630 
631  const PointScalar muJacobiScaling = mu_s0 + mu_s1 + mu_s2;
632 
633  const PointScalar muLegendreScaling = mu_s0 + mu_s1;
634  Polynomials::shiftedScaledLegendreValues(Pmu, polyOrder_-1, mu_s1, muLegendreScaling);
635 
636  Kokkos::Array<PointScalar, 3> vectorWeight;
637  computeFaceVectorWeight(vectorWeight, nu[0][a-1], nuGrad[0][a-1],
638  nu[1][a-1], nuGrad[1][a-1],
639  nu[2][a-1], nuGrad[2][a-1]);
640 
641  Kokkos::Array<OutputScalar,3> VTRI_00;
642  V_TRI(VTRI_00,0,0,P,P_2ip1,vectorWeight);
643 
644  Kokkos::Array<OutputScalar,3> EE_0;
645  E_E(EE_0, 0, P, s0, s1, s0_grad, s1_grad);
646 
647  for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
648  {
649  for (int i=0; i<=totalPolyOrder; i++)
650  {
651  const int j = totalPolyOrder - i;
652 
653  const double alpha = i*2.0 + 1;
654  Polynomials::shiftedScaledJacobiValues( P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
655  Polynomials::shiftedScaledJacobiValues(Pmu_2ip1, alpha, polyOrder_-1, mu_s2, muJacobiScaling);
656 
657  Kokkos::Array<OutputScalar,3> VTRI; // output from V_TRI
658  V_TRI(VTRI, i, j, P, P_2ip1, vectorWeight);
659 
660  Kokkos::Array<OutputScalar,3> one_over_mu_VTRI_mu; // (B.42) result
661  V_TRI_B42(one_over_mu_VTRI_mu, VTRI_00, EE_0, s2, mu_c_b, mu_c_b_grad, i, j, Pmu, Pmu_2ip1);
662 
663  for (ordinal_type d=0; d<3; d++)
664  {
665  output_(fieldOrdinalOffset,pointOrdinal,d) = 0.5 * (VTRI[d] * mu_c_b + one_over_mu_VTRI_mu[d]);
666  }
667 
668  fieldOrdinalOffset++;
669  }
670  }
671  }
672  } // triangle faces block
673 
674  // interior functions
675  {
676  // label scratch
677  const auto & Li_muZ01 = scratch1D_1; // used for phi_k^E values in Family I, II, IV
678  const auto & Li_muX01 = scratch1D_2; // used for E_QUAD computations
679  const auto & Li_muY01 = scratch1D_3; // used for E_QUAD computations
680  const auto & Pi_muX01 = scratch1D_4; // used for E_QUAD computations where xi_1 comes first
681  const auto & Pi_muY01 = scratch1D_5; // used for E_QUAD computations where xi_2 comes first
682  const auto & Pi_muZ01 = scratch1D_6; // used for E_QUAD computations where xi_2 comes first
683  const auto & Li_dt_muX01 = scratch1D_7; // used for E_QUAD computations
684  const auto & Li_dt_muY01 = scratch1D_8; // used for E_QUAD computations
685  const auto & Li_dt_muZ01 = scratch1D_9; // used for E_QUAD computations
686 
687  const auto & muX_0 = mu[0][0]; const auto & muX_0_grad = muGrad[0][0];
688  const auto & muX_1 = mu[1][0]; const auto & muX_1_grad = muGrad[1][0];
689  const auto & muY_0 = mu[0][1]; const auto & muY_0_grad = muGrad[0][1];
690  const auto & muY_1 = mu[1][1]; const auto & muY_1_grad = muGrad[1][1];
691  const auto & muZ_0 = mu[0][2]; const auto & muZ_0_grad = muGrad[0][2];
692  const auto & muZ_1 = mu[1][2]; const auto & muZ_1_grad = muGrad[1][2];
693 
694  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muX01, polyOrder_, muX_1, muX_0 + muX_1);
695  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muY01, polyOrder_, muY_1, muY_0 + muY_1);
696  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
697 
698  Polynomials::shiftedScaledLegendreValues(Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
699  Polynomials::shiftedScaledLegendreValues(Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
700  Polynomials::shiftedScaledLegendreValues(Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
701 
702  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muX01, Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
703  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muY01, Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
704  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muZ01, Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
705 
706  // FAMILIES I & II -- divergence-free families
707  // following the ESEAS ordering: k increments first
708  for (int f=0; f<2; f++)
709  {
710  const auto &s0 = (f==0) ? muX_0 : muY_0; const auto & s0_grad = (f==0) ? muX_0_grad : muY_0_grad;
711  const auto &s1 = (f==0) ? muX_1 : muY_1; const auto & s1_grad = (f==0) ? muX_1_grad : muY_1_grad;
712  const auto & t0_grad = (f==0) ? muY_0_grad : muX_0_grad;
713  const auto & t1_grad = (f==0) ? muY_1_grad : muX_1_grad;
714  const auto & Pi_s01 = (f==0) ? Pi_muX01 : Pi_muY01;
715  const auto & Pi_t01 = (f==0) ? Pi_muY01 : Pi_muX01;
716  const auto & Li_t01 = (f==0) ? Li_muY01 : Li_muX01;
717  const auto & Li_dt_t01 = (f==0) ? Li_dt_muY01 : Li_dt_muX01;
718 
719  for (int k=2; k<=polyOrder_; k++)
720  {
721  const auto & phi_k = Li_muZ01(k);
722  Kokkos::Array<OutputScalar,3> phi_k_grad;
723  computeGradHomLi(phi_k_grad, k, Pi_muZ01, Li_dt_muZ01, muZ_0_grad, muZ_1_grad);
724 
725  Kokkos::Array<OutputScalar,3> muZ0_grad_phi_k_plus_phi_k_grad_muZ0;
726  for (ordinal_type d=0; d<3; d++)
727  {
728  muZ0_grad_phi_k_plus_phi_k_grad_muZ0[d] = muZ_0 * phi_k_grad[d] + phi_k * muZ_0_grad[d];
729  }
730 
731  // for reasons that I don't entirely understand, ESEAS switches whether i or j are the fastest-moving indices depending on whether it's family I or II. Following their code, I'm calling the outer loop variable jg, inner ig.
732  // (Cross-code comparisons are considerably simpler if we number the dofs in the same way.)
733  ordinal_type jg_min = (f==0) ? 2 : 0;
734  ordinal_type jg_max = (f==0) ? polyOrder_ : polyOrder_-1;
735  ordinal_type ig_min = (f==0) ? 0 : 2;
736  ordinal_type ig_max = (f==0) ? polyOrder_ -1 : polyOrder_;
737  for (ordinal_type jg=jg_min; jg<=jg_max; jg++)
738  {
739  for (ordinal_type ig=ig_min; ig<=ig_max; ig++)
740  {
741  const ordinal_type &i = (f==0) ? ig : jg;
742  const ordinal_type &j = (f==0) ? jg : ig;
743  Kokkos::Array<OutputScalar,3> EQUAD_ij;
744  Kokkos::Array<OutputScalar,3> curl_EQUAD_ij;
745 
746  E_QUAD(EQUAD_ij, i, j, Pi_s01, s0, s1, s0_grad, s1_grad, Li_t01);
747 
748  E_QUAD_CURL(curl_EQUAD_ij, i, j, Pi_s01, s0, s1, s0_grad, s1_grad,
749  Pi_t01, Li_t01, Li_dt_t01, t0_grad, t1_grad);
750 
751  // first term: muZ_0 phi^E_k curl EQUAD
752  // we can reuse the memory for curl_EQUAD_ij; we won't need the values there again
753  Kokkos::Array<OutputScalar,3> & firstTerm = curl_EQUAD_ij;
754  for (ordinal_type d=0; d<3; d++)
755  {
756  firstTerm[d] *= muZ_0 * phi_k;
757  }
758 
759  Kokkos::Array<OutputScalar,3> secondTerm; //(muZ0 grad phi + phi grad muZ0) x EQUAD
760 
761  cross(secondTerm, muZ0_grad_phi_k_plus_phi_k_grad_muZ0, EQUAD_ij);
762 
763  for (ordinal_type d=0; d<3; d++)
764  {
765  output_(fieldOrdinalOffset,pointOrdinal,d) = firstTerm[d] + secondTerm[d];
766  }
767 
768  fieldOrdinalOffset++;
769  }
770  }
771  }
772  } // family I, II loop
773 
774  // FAMILY III -- a divergence-free family
775  for (int j=2; j<=polyOrder_; j++)
776  {
777  // phi_ij_QUAD: phi_i(mu_X01) * phi_j(mu_Y01) // (following the ESEAS *implementation*; Fuentes et al. (p. 454) actually have the arguments reversed, which leads to a different basis ordering)
778  const auto & phi_j = Li_muY01(j);
779  Kokkos::Array<OutputScalar,3> phi_j_grad;
780  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
781  for (int i=2; i<=polyOrder_; i++)
782  {
783  const auto & phi_i = Li_muX01(i);
784  Kokkos::Array<OutputScalar,3> phi_i_grad;
785  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
786 
787  Kokkos::Array<OutputScalar,3> phi_ij_grad;
788  for (ordinal_type d=0; d<3; d++)
789  {
790  phi_ij_grad[d] = phi_i * phi_j_grad[d] + phi_j * phi_i_grad[d];
791  }
792 
793  Kokkos::Array<OutputScalar,3> cross_product; // phi_ij_grad x grad_muZ0
794  cross(cross_product, phi_ij_grad, muZ_0_grad);
795 
796  ordinal_type n = max(i,j);
797  OutputScalar weight = n * pow(muZ_0,n-1);
798  for (ordinal_type d=0; d<3; d++)
799  {
800  output_(fieldOrdinalOffset,pointOrdinal,d) = weight * cross_product[d];
801  }
802  fieldOrdinalOffset++;
803  }
804  }
805 
806  // FAMILY IV (non-trivial divergences)
807  {
808  const auto muZ_0_squared = muZ_0 * muZ_0;
809  const auto &s0 = muX_0; const auto & s0_grad = muX_0_grad;
810  const auto &s1 = muX_1; const auto & s1_grad = muX_1_grad;
811  const auto &t0 = muY_0; const auto & t0_grad = muY_0_grad;
812  const auto &t1 = muY_1; const auto & t1_grad = muY_1_grad;
813  const auto &Pi = Pi_muX01;
814  const auto &Pj = Pi_muY01;
815  for (int k=2; k<=polyOrder_; k++)
816  {
817  const auto & phi_k = Li_muZ01(k);
818  for (int j=0; j<polyOrder_; j++)
819  {
820  for (int i=0; i<polyOrder_; i++)
821  {
822  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
823  V_QUAD(VQUAD, i, j,
824  Pi, s0, s1, s0_grad, s1_grad,
825  Pj, t0, t1, t0_grad, t1_grad);
826 
827  for (int d=0; d<3; d++)
828  {
829  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_0_squared * phi_k * VQUAD[d];
830  }
831 
832  fieldOrdinalOffset++;
833  }
834  }
835  }
836  }
837 
838  // FAMILY V (non-trivial divergences)
839  {
840  for (int j=2; j<=polyOrder_; j++)
841  {
842  const auto & phi_j = Li_muY01(j);
843  Kokkos::Array<OutputScalar,3> phi_j_grad;
844  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
845 
846  for (int i=2; i<=polyOrder_; i++)
847  {
848  const auto & phi_i = Li_muX01(i);
849  Kokkos::Array<OutputScalar,3> phi_i_grad;
850  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
851 
852  const int n = max(i,j);
853  const OutputScalar muZ_1_nm1 = pow(muZ_1,n-1);
854 
855  Kokkos::Array<OutputScalar,3> VLEFTTRI;
856  V_LEFT_TRI(VLEFTTRI, phi_i, phi_i_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
857 
858  for (int d=0; d<3; d++)
859  {
860  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_nm1 * VLEFTTRI[d];
861  }
862 
863  fieldOrdinalOffset++;
864  }
865  }
866  }
867 
868  // FAMILY VI (non-trivial divergences)
869  for (int i=2; i<=polyOrder_; i++)
870  {
871  const auto & phi_i = Li_muX01(i);
872  Kokkos::Array<OutputScalar,3> phi_i_grad;
873  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
874 
875  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
876  V_RIGHT_TRI(VRIGHTTRI, muY_1, muY_1_grad, phi_i, phi_i_grad, muZ_0, muZ_0_grad);
877 
878  const OutputScalar muZ_1_im1 = pow(muZ_1,i-1);
879 
880  for (int d=0; d<3; d++)
881  {
882  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_im1 * VRIGHTTRI[d];
883  }
884 
885  fieldOrdinalOffset++;
886  }
887 
888  // FAMILY VII (non-trivial divergences)
889  for (int j=2; j<=polyOrder_; j++)
890  {
891  const auto & phi_j = Li_muY01(j);
892  Kokkos::Array<OutputScalar,3> phi_j_grad;
893  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
894 
895  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
896  V_RIGHT_TRI(VRIGHTTRI, muX_1, muX_1_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
897 
898  const OutputScalar muZ_1_jm1 = pow(muZ_1,j-1);
899 
900  for (int d=0; d<3; d++)
901  {
902  output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_jm1 * VRIGHTTRI[d];
903  }
904 
905  fieldOrdinalOffset++;
906  }
907  }
908  } // end OPERATOR_VALUE
909  break;
910  case OPERATOR_DIV:
911  {
912  ordinal_type fieldOrdinalOffset = 0;
913  // quadrilateral face
914  {
915  // rename scratch1, scratch2
916  auto & Pi = scratch1D_1;
917  auto & Pj = scratch1D_2;
918 
919  auto & s0 = mu[0][0], s1 = mu[1][0];
920  auto & s0_grad = muGrad[0][0], s1_grad = muGrad[1][0];
921  auto & t0 = mu[0][1], t1 = mu[1][1];
922  auto & t0_grad = muGrad[0][1], t1_grad = muGrad[1][1];
923 
924  Polynomials::shiftedScaledLegendreValues(Pi, polyOrder_-1, s1, s0 + s1);
925  Polynomials::shiftedScaledLegendreValues(Pj, polyOrder_-1, t1, t0 + t1);
926 
927  const auto & muZ0 = mu[0][2];
928  const auto & muZ0_grad = muGrad[0][2];
929  OutputScalar three_mu0_squared = 3.0 * muZ0 * muZ0;
930 
931  // following the ESEAS ordering: j increments first
932  for (int j=0; j<polyOrder_; j++)
933  {
934  for (int i=0; i<polyOrder_; i++)
935  {
936  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
937  V_QUAD(VQUAD, i, j,
938  Pi, s0, s1, s0_grad, s1_grad,
939  Pj, t0, t1, t0_grad, t1_grad);
940 
941  OutputScalar grad_muZ0_dot_VQUAD;
942  dot(grad_muZ0_dot_VQUAD, muZ0_grad, VQUAD);
943 
944  output_(fieldOrdinalOffset,pointOrdinal) = three_mu0_squared * grad_muZ0_dot_VQUAD;
945  fieldOrdinalOffset++;
946  }
947  }
948  } // end quad face block
949 
950  // triangle faces
951  {
952  const auto & P = scratch1D_1; // for V_TRI( nu_012)
953  const auto & P_2ip1 = scratch1D_2;
954  const auto & Pmu = scratch1D_3; // for V_TRI( mu * nu_012)
955  const auto & Pmu_2ip1 = scratch1D_4;
956  for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
957  {
958  // face 0,2 --> a=1, b=2
959  // face 1,3 --> a=2, b=1
960  int a = (faceOrdinal % 2 == 0) ? 1 : 2;
961  int b = 3 - a;
962  // face 0,3 --> c=0
963  // face 1,2 --> c=1
964  int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
965 
966  const auto & s0 = nu[0][a-1]; const auto & s0_grad = nuGrad[0][a-1];
967  const auto & s1 = nu[1][a-1]; const auto & s1_grad = nuGrad[1][a-1];
968  const auto & s2 = nu[2][a-1]; const auto & s2_grad = nuGrad[2][a-1];
969  const PointScalar jacobiScaling = s0 + s1 + s2; // we can actually assume that this is 1; see comment at bottom of p. 425 of Fuentes et al.
970 
971  const PointScalar legendreScaling = s0 + s1;
972  Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
973 
974  const auto lambda0_index = tri_face_vertex_0[faceOrdinal];
975  const auto lambda1_index = tri_face_vertex_1[faceOrdinal];
976  const auto lambda2_index = tri_face_vertex_2[faceOrdinal];
977 
978  const auto & mu_c_b = mu[c][b-1];
979  const auto & mu_c_b_grad = muGrad[c][b-1];
980 
981  const auto & mu_s0 = lambda[lambda0_index];
982  const auto & mu_s1 = lambda[lambda1_index];
983  const auto & mu_s2 = lambda[lambda2_index]; // == s2
984 
985  const PointScalar muJacobiScaling = mu_s0 + mu_s1 + mu_s2;
986 
987  const PointScalar muLegendreScaling = mu_s0 + mu_s1;
988  Polynomials::shiftedScaledLegendreValues(Pmu, polyOrder_-1, mu_s1, muLegendreScaling);
989 
990  Kokkos::Array<PointScalar, 3> vectorWeight;
991  computeFaceVectorWeight(vectorWeight, nu[0][a-1], nuGrad[0][a-1],
992  nu[1][a-1], nuGrad[1][a-1],
993  nu[2][a-1], nuGrad[2][a-1]);
994 
995  Kokkos::Array<PointScalar,3> & mu_s0_grad = lambdaGrad[lambda0_index];
996  Kokkos::Array<PointScalar,3> & mu_s1_grad = lambdaGrad[lambda1_index];
997  Kokkos::Array<PointScalar,3> & mu_s2_grad = lambdaGrad[lambda2_index]; // == s2_grad
998 
999  Kokkos::Array<PointScalar, 3> muVectorWeight;
1000  computeFaceVectorWeight(muVectorWeight, mu_s0, mu_s0_grad,
1001  mu_s1, mu_s1_grad,
1002  mu_s2, mu_s2_grad);
1003 
1004  OutputScalar muDivWeight;
1005  computeFaceDivWeight(muDivWeight, mu_s0_grad, mu_s1_grad, mu_s2_grad);
1006 
1007  Kokkos::Array<OutputScalar,3> VTRI_00;
1008  V_TRI(VTRI_00,0,0,P,P_2ip1,vectorWeight);
1009 
1010  Kokkos::Array<OutputScalar,3> EE_0;
1011  E_E(EE_0, 0, P, s0, s1, s0_grad, s1_grad);
1012 
1013  for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
1014  {
1015  for (int i=0; i<=totalPolyOrder; i++)
1016  {
1017  const int j = totalPolyOrder - i;
1018 
1019  const double alpha = i*2.0 + 1;
1020  Polynomials::shiftedScaledJacobiValues( P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
1021  Polynomials::shiftedScaledJacobiValues(Pmu_2ip1, alpha, polyOrder_-1, mu_s2, muJacobiScaling);
1022 
1023  Kokkos::Array<OutputScalar,3> VTRI; // output from V_TRI
1024 
1025  V_TRI(VTRI, i, j, P, P_2ip1, vectorWeight);
1026 
1027  OutputScalar div_one_over_mu_VTRI_mu;
1028  V_TRI_B42_DIV(div_one_over_mu_VTRI_mu, VTRI_00, EE_0, s2, s2_grad, mu_c_b, mu_c_b_grad, i, j, Pmu, Pmu_2ip1);
1029 
1030  output_(fieldOrdinalOffset,pointOrdinal) = 0.5 * (dot(mu_c_b_grad, VTRI) + div_one_over_mu_VTRI_mu);
1031 
1032  fieldOrdinalOffset++;
1033  }
1034  }
1035  }
1036  } // end triangle face block
1037 
1038  {
1039  // FAMILY I -- divergence free
1040  // following the ESEAS ordering: k increments first
1041  for (int k=2; k<=polyOrder_; k++)
1042  {
1043  for (int j=2; j<=polyOrder_; j++)
1044  {
1045  for (int i=0; i<polyOrder_; i++)
1046  {
1047  output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1048  fieldOrdinalOffset++;
1049  }
1050  }
1051  }
1052 
1053  // FAMILY II -- divergence free
1054  // following the ESEAS ordering: k increments first
1055  for (int k=2; k<=polyOrder_; k++)
1056  {
1057  for (int j=2; j<=polyOrder_; j++)
1058  {
1059  for (int i=0; i<polyOrder_; i++)
1060  {
1061  output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1062  fieldOrdinalOffset++;
1063  }
1064  }
1065  }
1066 
1067  // FAMILY III -- divergence free
1068  for (int j=2; j<=polyOrder_; j++)
1069  {
1070  for (int i=2; i<=polyOrder_; i++)
1071  {
1072  output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1073  fieldOrdinalOffset++;
1074  }
1075  }
1076 
1077  const auto & Li_muZ01 = scratch1D_1; // used in Family IV
1078  const auto & Li_muX01 = scratch1D_2; // used in Family V
1079  const auto & Li_muY01 = scratch1D_3; // used in Family V
1080  const auto & Pi_muX01 = scratch1D_4; // used in Family IV
1081  const auto & Pi_muY01 = scratch1D_5; // used in Family IV
1082  const auto & Pi_muZ01 = scratch1D_6; // used in Family IV
1083  const auto & Li_dt_muX01 = scratch1D_7; // used in Family V
1084  const auto & Li_dt_muY01 = scratch1D_8; // used in Family V
1085  const auto & Li_dt_muZ01 = scratch1D_9; // used in Family IV
1086 
1087  const auto & muX_0 = mu[0][0]; const auto & muX_0_grad = muGrad[0][0];
1088  const auto & muX_1 = mu[1][0]; const auto & muX_1_grad = muGrad[1][0];
1089  const auto & muY_0 = mu[0][1]; const auto & muY_0_grad = muGrad[0][1];
1090  const auto & muY_1 = mu[1][1]; const auto & muY_1_grad = muGrad[1][1];
1091  const auto & muZ_0 = mu[0][2]; const auto & muZ_0_grad = muGrad[0][2];
1092  const auto & muZ_1 = mu[1][2]; const auto & muZ_1_grad = muGrad[1][2];
1093 
1094  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1095  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1096  Polynomials::shiftedScaledIntegratedLegendreValues(Li_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1097 
1098  Polynomials::shiftedScaledLegendreValues(Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1099  Polynomials::shiftedScaledLegendreValues(Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1100  Polynomials::shiftedScaledLegendreValues(Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1101 
1102  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muX01, Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1103  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muY01, Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1104  Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muZ01, Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1105 
1106  // FAMILY IV -- non-trivial divergences
1107  {
1108  const auto muZ_0_squared = muZ_0 * muZ_0;
1109  const auto &s0 = muX_0; const auto & s0_grad = muX_0_grad;
1110  const auto &s1 = muX_1; const auto & s1_grad = muX_1_grad;
1111  const auto &t0 = muY_0; const auto & t0_grad = muY_0_grad;
1112  const auto &t1 = muY_1; const auto & t1_grad = muY_1_grad;
1113  const auto &Pi = Pi_muX01;
1114  const auto &Pj = Pi_muY01;
1115 
1116  for (int k=2; k<=polyOrder_; k++)
1117  {
1118  const auto & phi_k = Li_muZ01(k);
1119  Kokkos::Array<OutputScalar,3> phi_k_grad;
1120  computeGradHomLi(phi_k_grad, k, Pi_muZ01, Li_dt_muZ01, muZ_0_grad, muZ_1_grad);
1121  for (int j=0; j<polyOrder_; j++)
1122  {
1123  for (int i=0; i<polyOrder_; i++)
1124  {
1125  Kokkos::Array<OutputScalar,3> firstTerm{0,0,0}; // muZ_0_squared * grad phi_k + 2 muZ_0 * phi_k * grad muZ_0
1126  for (int d=0; d<3; d++)
1127  {
1128  firstTerm[d] += muZ_0_squared * phi_k_grad[d] + 2. * muZ_0 * phi_k * muZ_0_grad[d];
1129  }
1130  Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
1131  V_QUAD(VQUAD, i, j,
1132  Pi, s0, s1, s0_grad, s1_grad,
1133  Pj, t0, t1, t0_grad, t1_grad);
1134 
1135  OutputScalar divValue;
1136  dot(divValue, firstTerm, VQUAD);
1137  output_(fieldOrdinalOffset,pointOrdinal) = divValue;
1138 
1139  fieldOrdinalOffset++;
1140  }
1141  }
1142  }
1143  }
1144 
1145  // FAMILY V -- non-trivial divergences
1146  {
1147  for (int j=2; j<=polyOrder_; j++)
1148  {
1149  const auto & phi_j = Li_muX01(j);
1150  Kokkos::Array<OutputScalar,3> phi_j_grad;
1151  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
1152 
1153  for (int i=2; i<=polyOrder_; i++)
1154  {
1155  const auto & phi_i = Li_muY01(i);
1156  Kokkos::Array<OutputScalar,3> phi_i_grad;
1157  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
1158 
1159  const int n = max(i,j);
1160  const OutputScalar muZ_1_nm2 = pow(muZ_1,n-2);
1161 
1162  Kokkos::Array<OutputScalar,3> VLEFTTRI;
1163  V_LEFT_TRI(VLEFTTRI, phi_i, phi_i_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
1164 
1165  OutputScalar dot_product;
1166  dot(dot_product, muZ_1_grad, VLEFTTRI);
1167  output_(fieldOrdinalOffset,pointOrdinal) = (n-1) * muZ_1_nm2 * dot_product;
1168 
1169  fieldOrdinalOffset++;
1170  }
1171  }
1172  }
1173 
1174  // FAMILY VI (non-trivial divergences)
1175  for (int i=2; i<=polyOrder_; i++)
1176  {
1177  const auto & phi_i = Li_muX01(i);
1178  Kokkos::Array<OutputScalar,3> phi_i_grad;
1179  computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
1180 
1181  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
1182  V_RIGHT_TRI(VRIGHTTRI, muY_1, muY_1_grad, phi_i, phi_i_grad, muZ_0, muZ_0_grad);
1183 
1184  OutputScalar dot_product;
1185  dot(dot_product, muZ_1_grad, VRIGHTTRI);
1186 
1187  const OutputScalar muZ_1_im2 = pow(muZ_1,i-2);
1188  output_(fieldOrdinalOffset,pointOrdinal) = (i-1) * muZ_1_im2 * dot_product;
1189 
1190  fieldOrdinalOffset++;
1191  }
1192 
1193  // FAMILY VII (non-trivial divergences)
1194  for (int j=2; j<=polyOrder_; j++)
1195  {
1196  const auto & phi_j = Li_muY01(j);
1197  Kokkos::Array<OutputScalar,3> phi_j_grad;
1198  computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
1199 
1200  Kokkos::Array<OutputScalar,3> VRIGHTTRI;
1201  V_RIGHT_TRI(VRIGHTTRI, muX_1, muX_1_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
1202 
1203  OutputScalar dot_product;
1204  dot(dot_product, muZ_1_grad, VRIGHTTRI);
1205 
1206  const OutputScalar muZ_1_jm2 = pow(muZ_1,j-2);
1207  output_(fieldOrdinalOffset,pointOrdinal) = (j-1) * muZ_1_jm2 * dot_product;
1208 
1209  fieldOrdinalOffset++;
1210  }
1211 
1212  } // end interior function block
1213 
1214  } // end OPERATOR_DIV block
1215  break;
1216  case OPERATOR_GRAD:
1217  case OPERATOR_D1:
1218  case OPERATOR_D2:
1219  case OPERATOR_D3:
1220  case OPERATOR_D4:
1221  case OPERATOR_D5:
1222  case OPERATOR_D6:
1223  case OPERATOR_D7:
1224  case OPERATOR_D8:
1225  case OPERATOR_D9:
1226  case OPERATOR_D10:
1227  INTREPID2_TEST_FOR_ABORT( true,
1228  ">>> ERROR: (Intrepid2::Hierarchical_HDIV_PYR_Functor) Computing of second and higher-order derivatives is not currently supported");
1229  default:
1230  // unsupported operator type
1231  device_assert(false);
1232  }
1233  }
1234 
1235  // Provide the shared memory capacity.
1236  // This function takes the team_size as an argument,
1237  // which allows team_size-dependent allocations.
1238  size_t team_shmem_size (int team_size) const
1239  {
1240  // we use shared memory to create a fast buffer for basis computations
1241  size_t shmem_size = 0;
1242  if (fad_size_output_ > 0)
1243  {
1244  // 1D scratch views (we have up to 9 of them):
1245  shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1, fad_size_output_);
1246  }
1247  else
1248  {
1249  // 1D scratch views (we have up to 9 of them):
1250  shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1);
1251  }
1252 
1253  return shmem_size;
1254  }
1255  };
1256 
1268  template<typename DeviceType,
1269  typename OutputScalar = double,
1270  typename PointScalar = double,
1271  bool useCGBasis = true> // if useCGBasis is true, basis functions are associated with either faces or the interior. If false, basis functions are all associated with interior
1273  : public Basis<DeviceType,OutputScalar,PointScalar>
1274  {
1275  public:
1277 
1278  using typename BasisBase::OrdinalTypeArray1DHost;
1279  using typename BasisBase::OrdinalTypeArray2DHost;
1280 
1281  using typename BasisBase::OutputViewType;
1282  using typename BasisBase::PointViewType;
1283  using typename BasisBase::ScalarViewType;
1284 
1285  using typename BasisBase::ExecutionSpace;
1286 
1287  protected:
1288  int polyOrder_; // the maximum order of the polynomial
1289  EPointType pointType_;
1290  public:
1301  HierarchicalBasis_HDIV_PYR(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
1302  :
1303  polyOrder_(polyOrder),
1304  pointType_(pointType)
1305  {
1306  INTREPID2_TEST_FOR_EXCEPTION(pointType!=POINTTYPE_DEFAULT,std::invalid_argument,"PointType not supported");
1307  const auto & p = polyOrder;
1308  this->basisCardinality_ = p * p + 2 * p * (p+1) + 3 * p * p * (p-1);
1309  this->basisDegree_ = p;
1310  this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Pyramid<> >() );
1311  this->basisType_ = BASIS_FEM_HIERARCHICAL;
1312  this->basisCoordinates_ = COORDINATES_CARTESIAN;
1313  this->functionSpace_ = FUNCTION_SPACE_HDIV;
1314 
1315  const int degreeLength = 1;
1316  this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("Integrated Legendre H(div) pyramid polynomial degree lookup", this->basisCardinality_, degreeLength);
1317  this->fieldOrdinalH1PolynomialDegree_ = OrdinalTypeArray2DHost("Integrated Legendre H(div) pyramid polynomial H^1 degree lookup", this->basisCardinality_, degreeLength);
1318 
1319  int fieldOrdinalOffset = 0;
1320 
1321  // **** face functions **** //
1322  // one quad face
1323  const int numFunctionsPerQuadFace = p*p;
1324 
1325  // following the ESEAS ordering: j increments first
1326  for (int j=0; j<p; j++)
1327  {
1328  for (int i=0; i<p; i++)
1329  {
1330  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = std::max(i,j);
1331  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = std::max(i,j)+1;
1332  fieldOrdinalOffset++;
1333  }
1334  }
1335 
1336  const int numFunctionsPerTriFace = 2 * p * (p+1) / 4;
1337  const int numTriFaces = 4;
1338  for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
1339  {
1340  for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
1341  {
1342  const int totalFaceDofs = (totalPolyOrder+1) * (totalPolyOrder+2) / 2; // number of dofs for which i+j <= totalPolyOrder
1343  const int totalFaceDofsPrevious = totalPolyOrder * (totalPolyOrder+1) / 2; // number of dofs for which i+j <= totalPolyOrder-1
1344  const int faceDofsForPolyOrder = totalFaceDofs - totalFaceDofsPrevious; // number of dofs for which i+j == totalPolyOrder
1345  for (int i=0; i<faceDofsForPolyOrder; i++)
1346  {
1347  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = totalPolyOrder;
1348  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = totalPolyOrder+1;
1349  fieldOrdinalOffset++;
1350  }
1351  }
1352  }
1353 
1354  // **** interior functions **** //
1355  const int numFunctionsPerVolume = 3 * p * p * (p-1);
1356 
1357  // FAMILY I
1358  // following the ESEAS ordering: k increments first
1359  for (int k=2; k<=polyOrder_; k++)
1360  {
1361  for (int j=2; j<=polyOrder_; j++)
1362  {
1363  for (int i=0; i<polyOrder_; i++)
1364  {
1365  const int max_jk = std::max(j,k);
1366  const int max_ijk = std::max(max_jk,i);
1367  const int max_ip1jk = std::max(max_jk,i+1);
1368  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1369  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jk;
1370  fieldOrdinalOffset++;
1371  }
1372  }
1373  }
1374 
1375  // FAMILY II
1376  for (int k=2; k<=polyOrder_; k++)
1377  {
1378  // ESEAS reverses i,j loop ordering for family II, relative to family I.
1379  // We follow ESEAS for convenience of cross-code comparison.
1380  for (int i=0; i<polyOrder_; i++)
1381  {
1382  for (int j=2; j<=polyOrder_; j++)
1383  {
1384  const int max_jk = std::max(j,k);
1385  const int max_ijk = std::max(max_jk,i);
1386  const int max_ip1jk = std::max(max_jk,i+1);
1387  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1388  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jk;
1389  fieldOrdinalOffset++;
1390  }
1391  }
1392  }
1393 
1394  // FAMILY III
1395  for (int j=2; j<=polyOrder_; j++)
1396  {
1397  for (int i=2; i<=polyOrder_; i++)
1398  {
1399  const int max_ij = std::max(i,j);
1400  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ij;
1401  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ij;
1402  fieldOrdinalOffset++;
1403  }
1404  }
1405 
1406  // FAMILY IV
1407  for (int k=2; k<=polyOrder_; k++)
1408  {
1409  for (int j=0; j<polyOrder_; j++)
1410  {
1411  for (int i=0; i<polyOrder_; i++)
1412  {
1413  const int max_jk = std::max(j,k);
1414  const int max_ijk = std::max(max_jk,i);
1415  const int max_ip1jp1k = std::max(std::max(j+1,k),i+1);
1416  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1417  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jp1k;
1418  fieldOrdinalOffset++;
1419  }
1420  }
1421  }
1422 
1423  // FAMILY V
1424  for (int j=2; j<=polyOrder_; j++)
1425  {
1426  for (int i=2; i<=polyOrder_; i++)
1427  {
1428  const int max_ij = std::max(i,j);
1429  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ij;
1430  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ij;
1431  fieldOrdinalOffset++;
1432  }
1433  }
1434 
1435  // FAMILY VI
1436  for (int i=2; i<=polyOrder_; i++)
1437  {
1438  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = i;
1439  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = i;
1440  fieldOrdinalOffset++;
1441  }
1442 
1443  // FAMILY VII
1444  for (int j=2; j<=polyOrder_; j++)
1445  {
1446  this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = j;
1447  this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = j;
1448  fieldOrdinalOffset++;
1449  }
1450 
1451  if (fieldOrdinalOffset != this->basisCardinality_)
1452  {
1453  std::cout << "Internal error: basis enumeration is incorrect; fieldOrdinalOffset = " << fieldOrdinalOffset;
1454  std::cout << "; basisCardinality_ = " << this->basisCardinality_ << std::endl;
1455 
1456  INTREPID2_TEST_FOR_EXCEPTION(fieldOrdinalOffset != this->basisCardinality_, std::invalid_argument, "Internal error: basis enumeration is incorrect");
1457  }
1458 
1459  // initialize tags
1460  {
1461  // Intrepid2 vertices:
1462  // 0: (-1,-1, 0)
1463  // 1: ( 1,-1, 0)
1464  // 2: ( 1, 1, 0)
1465  // 3: (-1, 1, 0)
1466  // 4: ( 0, 0, 1)
1467 
1468  // ESEAS vertices:
1469  // 0: ( 0, 0, 0)
1470  // 1: ( 1, 0, 0)
1471  // 2: ( 1, 1, 0)
1472  // 3: ( 0, 1, 0)
1473  // 4: ( 0, 0, 1)
1474 
1475  // The edge numbering appears to match between ESEAS and Intrepid2
1476 
1477  // ESEAS numbers pyramid faces differently from Intrepid2
1478  // See BlendProjectPyraTF in ESEAS.
1479  // See PyramidFaceNodeMap in Shards_BasicTopologies
1480  // ESEAS: 0123, 014, 124, 324, 034
1481  // Intrepid2: 014, 124, 234, 304, 0321
1482 
1483  const int intrepid2FaceOrdinals[5] {4,0,1,2,3}; // index is the ESEAS face ordinal; value is the intrepid2 ordinal
1484 
1485  const auto & cardinality = this->basisCardinality_;
1486 
1487  // Basis-dependent initializations
1488  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
1489  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
1490  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
1491  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
1492 
1493  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
1494  const int faceDim = 2, volumeDim = 3;
1495 
1496  if (useCGBasis) {
1497  {
1498  int tagNumber = 0;
1499  {
1500  // quad face
1501  const int faceOrdinalESEAS = 0;
1502  const int faceOrdinalIntrepid2 = intrepid2FaceOrdinals[faceOrdinalESEAS];
1503 
1504  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerQuadFace; functionOrdinal++)
1505  {
1506  tagView(tagNumber*tagSize+0) = faceDim; // face dimension
1507  tagView(tagNumber*tagSize+1) = faceOrdinalIntrepid2; // face id
1508  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1509  tagView(tagNumber*tagSize+3) = numFunctionsPerQuadFace; // total number of dofs on this face
1510  tagNumber++;
1511  }
1512  }
1513  for (int triFaceOrdinalESEAS=0; triFaceOrdinalESEAS<numTriFaces; triFaceOrdinalESEAS++)
1514  {
1515  const int faceOrdinalESEAS = triFaceOrdinalESEAS + 1;
1516  const int faceOrdinalIntrepid2 = intrepid2FaceOrdinals[faceOrdinalESEAS];
1517  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerTriFace; functionOrdinal++)
1518  {
1519  tagView(tagNumber*tagSize+0) = faceDim; // face dimension
1520  tagView(tagNumber*tagSize+1) = faceOrdinalIntrepid2; // face id
1521  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1522  tagView(tagNumber*tagSize+3) = numFunctionsPerTriFace; // total number of dofs on this face
1523  tagNumber++;
1524  }
1525  }
1526  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerVolume; functionOrdinal++)
1527  {
1528  tagView(tagNumber*tagSize+0) = volumeDim; // volume dimension
1529  tagView(tagNumber*tagSize+1) = 0; // volume id
1530  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1531  tagView(tagNumber*tagSize+3) = numFunctionsPerVolume; // total number of dofs in this volume
1532  tagNumber++;
1533  }
1534  INTREPID2_TEST_FOR_EXCEPTION(tagNumber != this->basisCardinality_, std::invalid_argument, "Internal error: basis tag enumeration is incorrect");
1535  }
1536  } else {
1537  for (ordinal_type i=0;i<cardinality;++i) {
1538  tagView(i*tagSize+0) = volumeDim; // volume dimension
1539  tagView(i*tagSize+1) = 0; // volume ordinal
1540  tagView(i*tagSize+2) = i; // local dof id
1541  tagView(i*tagSize+3) = cardinality; // total number of dofs on this face
1542  }
1543  }
1544 
1545  // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
1546  // tags are constructed on host
1547  this->setOrdinalTagData(this->tagToOrdinal_,
1548  this->ordinalToTag_,
1549  tagView,
1550  this->basisCardinality_,
1551  tagSize,
1552  posScDim,
1553  posScOrd,
1554  posDfOrd);
1555  }
1556  }
1557 
1562  const char* getName() const override {
1563  return "Intrepid2_HierarchicalBasis_HDIV_PYR";
1564  }
1565 
1568  virtual bool requireOrientation() const override {
1569  return (this->getDegree() > 2);
1570  }
1571 
1572  // since the getValues() below only overrides the FEM variant, we specify that
1573  // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
1574  // (It's an error to use the FVD variant on this basis.)
1575  using BasisBase::getValues;
1576 
1595  virtual void getValues( OutputViewType outputValues, const PointViewType inputPoints,
1596  const EOperator operatorType = OPERATOR_VALUE ) const override
1597  {
1598  auto numPoints = inputPoints.extent_int(0);
1599 
1601 
1602  FunctorType functor(operatorType, outputValues, inputPoints, polyOrder_);
1603 
1604  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputScalar>();
1605  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointScalar>();
1606  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1607  const int teamSize = 1; // because of the way the basis functions are computed, we don't have a second level of parallelism...
1608 
1609  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(numPoints,teamSize,vectorSize);
1610  Kokkos::parallel_for("Hierarchical_HDIV_PYR_Functor", policy, functor);
1611  }
1612 
1621  BasisPtr<DeviceType,OutputScalar,PointScalar>
1622  getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override{
1623  const auto & p = this->basisDegree_;
1624  if (subCellDim == 2)
1625  {
1626  if (subCellOrd == 4) // quad basis
1627  {
1629  using HVOL_QUAD = Basis_Derived_HVOL_QUAD<HVOL_LINE>;
1630  return Teuchos::rcp(new HVOL_QUAD(p-1));
1631  }
1632  else // tri basis
1633  {
1635  return Teuchos::rcp(new HVOL_Tri(p-1));
1636  }
1637  }
1638  INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Input parameters out of bounds");
1639  }
1640 
1645  virtual BasisPtr<typename Kokkos::HostSpace::device_type, OutputScalar, PointScalar>
1646  getHostBasis() const override {
1647  using HostDeviceType = typename Kokkos::HostSpace::device_type;
1649  return Teuchos::rcp( new HostBasisType(polyOrder_, pointType_) );
1650  }
1651  };
1652 } // end namespace Intrepid2
1653 
1654 // do ETI with default (double) type
1656 
1657 #endif /* Intrepid2_HierarchicalBasis_HDIV_PYR_h */
Kokkos::View< ordinal_type *, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
View type for 1d host array.
virtual void getValues(const ExecutionSpace &, OutputViewType, const PointViewType, const EOperator=OPERATOR_VALUE) const
Evaluation of a FEM basis on a reference cell.
Implementation of H(vol) basis on the quadrilateral that is templated on H(vol) on the line...
BasisPtr< DeviceType, OutputScalar, PointScalar > getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override
returns the basis associated to a subCell.
Defines several coordinates and their gradients on the pyramid; maps from Intrepid2 (shards) pyramid ...
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
Functor for computing values for the HierarchicalBasis_HDIV_PYR class.
Kokkos::DynRankView< scalarType, Kokkos::LayoutStride, DeviceType > ScalarViewType
View type for scalars.
Free functions, callable from device code, that implement various polynomials useful in basis definit...
const char * getName() const override
Returns basis name.
KOKKOS_INLINE_FUNCTION void E_QUAD(Kokkos::Array< OutputScalar, 3 > &EQUAD, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &HomPi_s01, const PointScalar &s0, const PointScalar &s1, const Kokkos::Array< PointScalar, 3 > &s0_grad, const Kokkos::Array< PointScalar, 3 > &s1_grad, const OutputScratchView &HomLi_t01) const
EFunctionSpace functionSpace_
The function space in which the basis is defined.
ordinal_type basisDegree_
Degree of the largest complete polynomial space that can be represented by the basis.
Header function for Intrepid2::Util class and other utility functions.
KOKKOS_INLINE_FUNCTION void V_LEFT_TRI(Kokkos::Array< OutputScalar, 3 > &VLEFTTRI, const OutputScalar &phi_i, const Kokkos::Array< OutputScalar, 3 > &phi_i_grad, const OutputScalar &phi_j, const Kokkos::Array< OutputScalar, 3 > &phi_j_grad, const OutputScalar &t0, const Kokkos::Array< OutputScalar, 3 > &t0_grad) const
See Fuentes et al. (p. 455), definition of V_{ij}^{}.
HierarchicalBasis_HDIV_PYR(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
Constructor.
KOKKOS_INLINE_FUNCTION void cross(Kokkos::Array< OutputScalar, 3 > &c, const Kokkos::Array< OutputScalar, 3 > &a, const Kokkos::Array< OutputScalar, 3 > &b) const
cross product: c = a x b
Kokkos::View< ordinal_type **, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray2DHost
View type for 2d host array.
Basis defining integrated Legendre basis on the line, a polynomial subspace of H(grad) on the line...
Basis defining Legendre basis on the line, a polynomial subspace of L^2 (a.k.a. H(vol)) on the line...
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, DeviceType > OutputViewType
View type for basis value output.
KOKKOS_INLINE_FUNCTION void V_TRI_B42_DIV(OutputScalar &div_VTRI_mus0_mus1_s2_over_mu, const Kokkos::Array< OutputScalar, 3 > &VTRI_00_s0_s1_s2, const Kokkos::Array< OutputScalar, 3 > &EE_0_s0_s1, const OutputScalar &s2, const Kokkos::Array< OutputScalar, 3 > &s2_grad, const OutputScalar &mu, const Kokkos::Array< OutputScalar, 3 > &mu_grad, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &P_mus0_mus1, const OutputScratchView &P_2ip1_mus0pmus1_s2) const
See Equation (B.42) in Fuentes et al. Computes 1/mu V^{tri}_ij(mu s0, mu s1, s2). ...
H(vol) basis on the line based on Legendre polynomials.
ordinal_type basisCardinality_
Cardinality of the basis, i.e., the number of basis functions/degrees-of-freedom. ...
KOKKOS_INLINE_FUNCTION void V_TRI_B42(Kokkos::Array< OutputScalar, 3 > &VTRI_mus0_mus1_s2_over_mu, const Kokkos::Array< OutputScalar, 3 > &VTRI_00_s0_s1_s2, const Kokkos::Array< OutputScalar, 3 > &EE_0_s0_s1, const OutputScalar &s2, const OutputScalar &mu, const Kokkos::Array< OutputScalar, 3 > &mu_grad, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &P_mus0_mus1, const OutputScratchView &P_2ip1_mus0pmus1_s2) const
See Equation (B.42) in Fuentes et al. Computes 1/mu V^{tri}_ij(mu s0, mu s1, s2). ...
OrdinalTypeArray3DHost tagToOrdinal_
DoF tag to ordinal lookup table.
KOKKOS_INLINE_FUNCTION void dot(OutputScalar &c, const Kokkos::Array< OutputScalar, 3 > &a, const Kokkos::Array< OutputScalar, 3 > &b) const
dot product: c = a b
Implementation of H(vol) basis on the quadrilateral that is templated on H(vol) on the line...
virtual BasisPtr< typename Kokkos::HostSpace::device_type, OutputScalar, PointScalar > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
KOKKOS_INLINE_FUNCTION void V_QUAD(Kokkos::Array< OutputScalar, 3 > &VQUAD, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &PHom_s, const PointScalar &s0, const PointScalar &s1, const Kokkos::Array< PointScalar, 3 > &s0_grad, const Kokkos::Array< PointScalar, 3 > &s1_grad, const OutputScratchView &PHom_t, const PointScalar &t0, const PointScalar &t1, const Kokkos::Array< PointScalar, 3 > &t0_grad, const Kokkos::Array< PointScalar, 3 > &t1_grad) const
OrdinalTypeArray2DHost ordinalToTag_
&quot;true&quot; if tagToOrdinal_ and ordinalToTag_ have been initialized
ordinal_type getDegree() const
Returns the degree of the basis.
ECoordinates basisCoordinates_
The coordinate system for which the basis is defined.
virtual bool requireOrientation() const override
True if orientation is required.
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, DeviceType > OutputViewType
View type for basis value output.
shards::CellTopology basisCellTopology_
Base topology of the cells for which the basis is defined. See the Shards package for definition of b...
OrdinalTypeArray2DHost fieldOrdinalH1PolynomialDegree_
H^1 polynomial degree for each degree of freedom. Only defined for hierarchical bases right now...
OrdinalTypeArray2DHost fieldOrdinalPolynomialDegree_
Polynomial degree for each degree of freedom. Only defined for hierarchical bases right now...
Basis defining Legendre basis on the line, a polynomial subspace of H(vol) on the line: extension to ...
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, DeviceType > PointViewType
View type for input points.
void setOrdinalTagData(OrdinalTypeView3D &tagToOrdinal, OrdinalTypeView2D &ordinalToTag, const OrdinalTypeView1D tags, const ordinal_type basisCard, const ordinal_type tagSize, const ordinal_type posScDim, const ordinal_type posScOrd, const ordinal_type posDfOrd)
Fills ordinalToTag_ and tagToOrdinal_ by basis-specific tag data.
virtual void getValues(OutputViewType outputValues, const PointViewType inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell.
H(vol) basis on the triangle based on integrated Legendre polynomials.
KOKKOS_INLINE_FUNCTION void V_RIGHT_TRI(Kokkos::Array< OutputScalar, 3 > &VRIGHTTRI, const OutputScalar &mu1, const Kokkos::Array< OutputScalar, 3 > &mu1_grad, const OutputScalar &phi_i, const Kokkos::Array< OutputScalar, 3 > &phi_i_grad, const OutputScalar &t0, const Kokkos::Array< OutputScalar, 3 > &t0_grad) const
See Fuentes et al. (p. 455), definition of V_{ij}^{}.
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, DeviceType > PointViewType
View type for input points.
Header file for the abstract base class Intrepid2::Basis.
typename DeviceType::execution_space ExecutionSpace
(Kokkos) Execution space for basis.
Kokkos::View< ordinal_type *, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
View type for 1d host array.