Intrepid2
Intrepid2_HierarchicalBasis_HDIV_TET.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Intrepid2 Package
4 //
5 // Copyright 2007 NTESS and the Intrepid2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
15 #ifndef Intrepid2_HierarchicalBasis_HDIV_TET_h
16 #define Intrepid2_HierarchicalBasis_HDIV_TET_h
17 
18 #include <Kokkos_DynRankView.hpp>
19 
20 #include <Intrepid2_config.h>
21 
22 #include "Intrepid2_Basis.hpp"
25 #include "Intrepid2_Utils.hpp"
26 
27 namespace Intrepid2
28 {
34  template<class DeviceType, class OutputScalar, class PointScalar,
35  class OutputFieldType, class InputPointsType>
37  {
38  using ExecutionSpace = typename DeviceType::execution_space;
39  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
40  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
41  using PointScratchView = Kokkos::View<PointScalar*, ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
42 
43  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
44  using TeamMember = typename TeamPolicy::member_type;
45 
46  EOperator opType_;
47 
48  OutputFieldType output_; // F,P
49  InputPointsType inputPoints_; // P,D
50 
51  ordinal_type polyOrder_;
52  ordinal_type numFields_, numPoints_;
53 
54  size_t fad_size_output_;
55 
56  static constexpr ordinal_type numVertices = 4;
57  static constexpr ordinal_type numFaces = 4;
58  static constexpr ordinal_type numVerticesPerFace = 3;
59  static constexpr ordinal_type numInteriorFamilies = 3;
60 
61  // index into face_vertices with faceOrdinal * numVerticesPerFace + vertexNumber
62  const ordinal_type face_vertices[numFaces*numVerticesPerFace] = {0,1,2, // face 0
63  0,1,3, // face 1
64  1,2,3, // face 2
65  0,2,3 // face 3
66  };
67 
68  const ordinal_type numFaceFunctionsPerFace_;
69  const ordinal_type numFaceFunctions_;
70  const ordinal_type numInteriorFunctionsPerFamily_;
71  const ordinal_type numInteriorFunctions_;
72 
73  // interior basis functions are computed in terms of certain face basis functions.
74  const ordinal_type faceOrdinalForInterior_[numInteriorFamilies] = {0,2,3};
75  const ordinal_type faceFamilyForInterior_[numInteriorFamilies] = {0,0,1};
76  const ordinal_type interiorCoordinateOrdinal_[numInteriorFamilies] = {3,0,1}; // m, where V^b_{ijk} is computed in terms of [L^{2(i+j)}_k](1-lambda_m, lambda_m)
77 
78  //
79  const ordinal_type interior_face_family_start_ [numInteriorFamilies] = {0,0,1};
80  const ordinal_type interior_face_family_middle_[numInteriorFamilies] = {1,1,2};
81  const ordinal_type interior_face_family_end_ [numInteriorFamilies] = {2,2,0};
82 
83  KOKKOS_INLINE_FUNCTION
84  ordinal_type dofOrdinalForFace(const ordinal_type &faceOrdinal,
85  const ordinal_type &zeroBasedFaceFamily,
86  const ordinal_type &i,
87  const ordinal_type &j) const
88  {
89  // determine where the functions for this face start
90  const ordinal_type faceDofOffset = faceOrdinal * numFaceFunctionsPerFace_;
91 
92  // rather than deriving a closed formula in terms of i and j (which is potentially error-prone),
93  // we simply step through a for loop much as we do in the basis computations themselves. (This method
94  // is not expected to be called so much as to be worth optimizing.)
95 
96  const ordinal_type max_ij_sum = polyOrder_ - 1;
97 
98  ordinal_type fieldOrdinal = faceDofOffset + zeroBasedFaceFamily; // families are interleaved on the face.
99 
100  for (ordinal_type ij_sum=1; ij_sum <= max_ij_sum; ij_sum++)
101  {
102  for (ordinal_type ii=0; ii<ij_sum; ii++)
103  {
104  // j will be ij_sum - i; j >= 1.
105  const ordinal_type jj = ij_sum - ii; // jj >= 1
106  if ( (ii == i) && (jj == j))
107  {
108  // have reached the (i,j) we're looking for
109  return fieldOrdinal;
110  }
111  fieldOrdinal++;
112  }
113  }
114  return -1; // error: not found.
115  }
116 
117  Hierarchical_HDIV_TET_Functor(EOperator opType, OutputFieldType output, InputPointsType inputPoints, int polyOrder)
118  : opType_(opType), output_(output), inputPoints_(inputPoints),
119  polyOrder_(polyOrder),
120  fad_size_output_(getScalarDimensionForView(output)),
121  numFaceFunctionsPerFace_(polyOrder * (polyOrder+1)/2), // p*(p+1)/2 functions per face
122  numFaceFunctions_(numFaceFunctionsPerFace_*numFaces), // 4 faces
123  numInteriorFunctionsPerFamily_((polyOrder-1)*polyOrder*(polyOrder+1)/6), // (p+1) choose 3
124  numInteriorFunctions_(numInteriorFunctionsPerFamily_ * numInteriorFamilies) // 3 families of interior functions
125  {
126  numFields_ = output.extent_int(0);
127  numPoints_ = output.extent_int(1);
128 
129  const ordinal_type expectedCardinality = numFaceFunctions_ + numInteriorFunctions_;
130 
131  // interior family I: computed in terms of face 012 (face ordinal 0), ordinal 0 in face family I.
132  // interior family II: computed in terms of face 123 (face ordinal 2), ordinal 2 in face family I.
133  // interior family III: computed in terms of face 230 (face ordinal 3), ordinal 3 in face family II.
134 
135  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != inputPoints.extent_int(0), std::invalid_argument, "point counts need to match!");
136  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != expectedCardinality, std::invalid_argument, "output field size does not match basis cardinality");
137  }
138 
139  KOKKOS_INLINE_FUNCTION
140  void computeFaceJacobi(OutputScratchView &P_2ip1,
141  const ordinal_type &zeroBasedFaceOrdinal,
142  const ordinal_type &i,
143  const PointScalar* lambda) const
144  {
145  // index into face_vertices with faceOrdinal * numVerticesPerFace + vertexNumber
146  const auto &s0_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 0];
147  const auto &s1_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 1];
148  const auto &s2_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 2];
149 
150  const auto & s0 = lambda[s0_index];
151  const auto & s1 = lambda[s1_index];
152  const auto & s2 = lambda[s2_index];
153  const PointScalar jacobiScaling = s0 + s1 + s2;
154 
155  const double alpha = i*2.0 + 1;
156  Polynomials::shiftedScaledJacobiValues(P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
157  }
158 
160  KOKKOS_INLINE_FUNCTION
161  void computeFaceJacobiForInterior(OutputScratchView &P_2ip1,
162  const ordinal_type &zeroBasedInteriorFamilyOrdinal,
163  const ordinal_type &i,
164  const PointScalar* lambda) const
165  {
166  const ordinal_type & relatedFaceOrdinal = faceOrdinalForInterior_[zeroBasedInteriorFamilyOrdinal];
167  const auto &s0_vertex_number = interior_face_family_start_ [zeroBasedInteriorFamilyOrdinal];
168  const auto &s1_vertex_number = interior_face_family_middle_[zeroBasedInteriorFamilyOrdinal];
169  const auto &s2_vertex_number = interior_face_family_end_ [zeroBasedInteriorFamilyOrdinal];
170 
171  // index into face_vertices with faceOrdinal * numVerticesPerFace + vertexNumber
172  const auto &s0_index = face_vertices[relatedFaceOrdinal * numVerticesPerFace + s0_vertex_number];
173  const auto &s1_index = face_vertices[relatedFaceOrdinal * numVerticesPerFace + s1_vertex_number];
174  const auto &s2_index = face_vertices[relatedFaceOrdinal * numVerticesPerFace + s2_vertex_number];
175 
176  const auto & s0 = lambda[s0_index];
177  const auto & s1 = lambda[s1_index];
178  const auto & s2 = lambda[s2_index];
179  const PointScalar jacobiScaling = s0 + s1 + s2;
180 
181  const double alpha = i*2.0 + 1;
182  Polynomials::shiftedScaledJacobiValues(P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
183  }
184 
185  KOKKOS_INLINE_FUNCTION
186  void computeFaceLegendre(OutputScratchView &P,
187  const ordinal_type &zeroBasedFaceOrdinal,
188  const PointScalar* lambda) const
189  {
190  // index into face_vertices with faceOrdinal * numVerticesPerFace + vertexNumber
191  const auto &s0_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 0];
192  const auto &s1_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 1];
193 
194  const auto & s0 = lambda[s0_index];
195  const auto & s1 = lambda[s1_index];
196  const PointScalar legendreScaling = s0 + s1;
197 
198  Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
199  }
200 
201  KOKKOS_INLINE_FUNCTION
202  void computeFaceLegendreForInterior(OutputScratchView &P,
203  const ordinal_type &zeroBasedInteriorFamilyOrdinal,
204  const PointScalar* lambda) const
205  {
206  const ordinal_type & relatedFaceOrdinal = faceOrdinalForInterior_[zeroBasedInteriorFamilyOrdinal];
207  const auto &s0_vertex_number = interior_face_family_start_ [zeroBasedInteriorFamilyOrdinal];
208  const auto &s1_vertex_number = interior_face_family_middle_[zeroBasedInteriorFamilyOrdinal];
209 
210  // index into face_vertices with faceOrdinal * numVerticesPerFace + vertexNumber
211  const auto &s0_index = face_vertices[relatedFaceOrdinal * numVerticesPerFace + s0_vertex_number];
212  const auto &s1_index = face_vertices[relatedFaceOrdinal * numVerticesPerFace + s1_vertex_number];
213 
214  const auto & s0 = lambda[s0_index];
215  const auto & s1 = lambda[s1_index];
216  const PointScalar legendreScaling = s0 + s1;
217 
218  Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
219  }
220 
221  KOKKOS_INLINE_FUNCTION
222  void computeFaceVectorWeight(OutputScalar &vectorWeight_x,
223  OutputScalar &vectorWeight_y,
224  OutputScalar &vectorWeight_z,
225  const ordinal_type &zeroBasedFaceOrdinal,
226  const PointScalar* lambda,
227  const PointScalar* lambda_dx,
228  const PointScalar* lambda_dy,
229  const PointScalar* lambda_dz) const
230  {
231  // compute s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1)
232 
233  const auto &s0_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 0];
234  const auto &s1_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 1];
235  const auto &s2_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 2];
236 
237  const auto & s0 = lambda [s0_index];
238  const auto & s0_dx = lambda_dx[s0_index];
239  const auto & s0_dy = lambda_dy[s0_index];
240  const auto & s0_dz = lambda_dz[s0_index];
241 
242  const auto & s1 = lambda [s1_index];
243  const auto & s1_dx = lambda_dx[s1_index];
244  const auto & s1_dy = lambda_dy[s1_index];
245  const auto & s1_dz = lambda_dz[s1_index];
246 
247  const auto & s2 = lambda [s2_index];
248  const auto & s2_dx = lambda_dx[s2_index];
249  const auto & s2_dy = lambda_dy[s2_index];
250  const auto & s2_dz = lambda_dz[s2_index];
251 
252  vectorWeight_x = s0 * (s1_dy * s2_dz - s1_dz * s2_dy)
253  + s1 * (s2_dy * s0_dz - s2_dz * s0_dy)
254  + s2 * (s0_dy * s1_dz - s0_dz * s1_dy);
255 
256  vectorWeight_y = s0 * (s1_dz * s2_dx - s1_dx * s2_dz)
257  + s1 * (s2_dz * s0_dx - s2_dx * s0_dz)
258  + s2 * (s0_dz * s1_dx - s0_dx * s1_dz);
259 
260  vectorWeight_z = s0 * (s1_dx * s2_dy - s1_dy * s2_dx)
261  + s1 * (s2_dx * s0_dy - s2_dy * s0_dx)
262  + s2 * (s0_dx * s1_dy - s0_dy * s1_dx);
263  }
264 
265  // This is the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
266  KOKKOS_INLINE_FUNCTION
267  void faceFunctionValue(OutputScalar &value_x,
268  OutputScalar &value_y,
269  OutputScalar &value_z,
270  const ordinal_type &i, // i >= 0
271  const ordinal_type &j, // j >= 0
272  const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
273  const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
274  const OutputScalar &vectorWeight_x, // x component of s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1)
275  const OutputScalar &vectorWeight_y, // y component
276  const OutputScalar &vectorWeight_z, // z component
277  const PointScalar* lambda) const
278  {
279  const auto &P_i = P(i);
280  const auto &P_2ip1_j = P_2ip1(j);
281 
282  value_x = P_i * P_2ip1_j * vectorWeight_x;
283  value_y = P_i * P_2ip1_j * vectorWeight_y;
284  value_z = P_i * P_2ip1_j * vectorWeight_z;
285  }
286 
287  KOKKOS_INLINE_FUNCTION
288  void computeFaceDivWeight(OutputScalar &divWeight,
289  const ordinal_type &zeroBasedFaceOrdinal,
290  const PointScalar* lambda_dx,
291  const PointScalar* lambda_dy,
292  const PointScalar* lambda_dz) const
293  {
294  // grad s0 \dot (grad s1 x grad s2)
295 
296  const auto &s0_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 0];
297  const auto &s1_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 1];
298  const auto &s2_index = face_vertices[zeroBasedFaceOrdinal * numVerticesPerFace + 2];
299 
300  const auto & s0_dx = lambda_dx[s0_index];
301  const auto & s0_dy = lambda_dy[s0_index];
302  const auto & s0_dz = lambda_dz[s0_index];
303 
304  const auto & s1_dx = lambda_dx[s1_index];
305  const auto & s1_dy = lambda_dy[s1_index];
306  const auto & s1_dz = lambda_dz[s1_index];
307 
308  const auto & s2_dx = lambda_dx[s2_index];
309  const auto & s2_dy = lambda_dy[s2_index];
310  const auto & s2_dz = lambda_dz[s2_index];
311 
312  divWeight = s0_dx * (s1_dy * s2_dz - s1_dz * s2_dy)
313  + s0_dy * (s1_dz * s2_dx - s1_dx * s2_dz)
314  + s0_dz * (s1_dx * s2_dy - s1_dy * s2_dx);
315  }
316 
317  KOKKOS_INLINE_FUNCTION
318  void computeInteriorIntegratedJacobi(OutputScratchView &L_2ipjp1,
319  const ordinal_type &i,
320  const ordinal_type &j,
321  const ordinal_type &zeroBasedFamilyOrdinal,
322  const PointScalar* lambda) const
323  {
324  const auto &lambda_m = lambda[interiorCoordinateOrdinal_[zeroBasedFamilyOrdinal]];
325 
326  const double alpha = 2 * (i + j + 1);
327 
328  const PointScalar jacobiScaling = 1.0;
329  Polynomials::shiftedScaledIntegratedJacobiValues(L_2ipjp1, alpha, polyOrder_-1, lambda_m, jacobiScaling);
330  }
331 
332  KOKKOS_INLINE_FUNCTION
333  void computeInteriorJacobi(OutputScratchView &P_2ipjp1,
334  const ordinal_type &i,
335  const ordinal_type &j,
336  const ordinal_type &zeroBasedFamilyOrdinal,
337  const PointScalar* lambda) const
338  {
339  const auto &lambda_m = lambda[interiorCoordinateOrdinal_[zeroBasedFamilyOrdinal]];
340 
341  const double alpha = 2 * (i + j + 1);
342 
343  const PointScalar jacobiScaling = 1.0;
344  Polynomials::shiftedScaledJacobiValues(P_2ipjp1, alpha, polyOrder_-1, lambda_m, jacobiScaling);
345  }
346 
347  // Divergence of the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
348  KOKKOS_INLINE_FUNCTION
349  void faceFunctionDiv(OutputScalar &divValue,
350  const ordinal_type &i, // i >= 0
351  const ordinal_type &j, // j >= 0
352  const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
353  const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
354  const OutputScalar &divWeight, // grad s0 \dot (grad s1 x grad s2)
355  const PointScalar* lambda) const
356  {
357  const auto &P_i = P(i);
358  const auto &P_2ip1_j = P_2ip1(j);
359 
360  divValue = (i + j + 3.) * P_i * P_2ip1_j * divWeight;
361  }
362 
363  // grad ([L^{2(i+j+1)}_k](1-lambda_m,lambda_m)), used in divergence of interior basis functions
364  KOKKOS_INLINE_FUNCTION
365  void gradInteriorIntegratedJacobi(OutputScalar &L_2ipjp1_dx,
366  OutputScalar &L_2ipjp1_dy,
367  OutputScalar &L_2ipjp1_dz,
368  const ordinal_type &zeroBasedFamilyOrdinal,
369  const ordinal_type &j,
370  const ordinal_type &k,
371  const OutputScratchView &P_2ipjp1, // container in which shiftedScaledJacobiValues have been computed for alpha=2(i+j+1), t0=1-lambda_m, t1=lambda_m
372  const PointScalar* lambda,
373  const PointScalar* lambda_dx,
374  const PointScalar* lambda_dy,
375  const PointScalar* lambda_dz) const
376  {
377  // grad [L^alpha_k](t0,t1) = [P^alpha_{k-1}](t0,t1) grad(t1) + [R^alpha_{k-1}](t0,t1) grad(t1+t0)
378  // here, t0 = 1-lambda_m, t1 = lambda_m ==> t1 + t0 = 1 ==> grad(t1+t0) = 0 ==> the R term vanishes.
379 
380  const ordinal_type &m = interiorCoordinateOrdinal_[zeroBasedFamilyOrdinal];
381 
382  L_2ipjp1_dx = P_2ipjp1(k-1) * lambda_dx[m];
383  L_2ipjp1_dy = P_2ipjp1(k-1) * lambda_dy[m];
384  L_2ipjp1_dz = P_2ipjp1(k-1) * lambda_dz[m];
385  }
386 
387  KOKKOS_INLINE_FUNCTION
388  void interiorFunctionDiv(OutputScalar &outputDiv,
389  OutputScalar &L_2ipjp1_k,
390  OutputScalar &faceDiv,
391  OutputScalar &L_2ipjp1_k_dx,
392  OutputScalar &L_2ipjp1_k_dy,
393  OutputScalar &L_2ipjp1_k_dz,
394  OutputScalar &faceValue_x,
395  OutputScalar &faceValue_y,
396  OutputScalar &faceValue_z) const
397  {
398  outputDiv = L_2ipjp1_k * faceDiv + L_2ipjp1_k_dx * faceValue_x + L_2ipjp1_k_dy * faceValue_y + L_2ipjp1_k_dz * faceValue_z;
399  }
400 
401  KOKKOS_INLINE_FUNCTION
402  void operator()(const TeamMember &teamMember) const {
403  auto pointOrdinal = teamMember.league_rank();
404  OutputScratchView scratch0, scratch1, scratch2, scratch3;
405  if (fad_size_output_ > 0) {
406  scratch0 = OutputScratchView(teamMember.team_shmem(), polyOrder_, fad_size_output_);
407  scratch1 = OutputScratchView(teamMember.team_shmem(), polyOrder_, fad_size_output_);
408  scratch2 = OutputScratchView(teamMember.team_shmem(), polyOrder_, fad_size_output_);
409  scratch3 = OutputScratchView(teamMember.team_shmem(), polyOrder_, fad_size_output_);
410  }
411  else {
412  scratch0 = OutputScratchView(teamMember.team_shmem(), polyOrder_);
413  scratch1 = OutputScratchView(teamMember.team_shmem(), polyOrder_);
414  scratch2 = OutputScratchView(teamMember.team_shmem(), polyOrder_);
415  scratch3 = OutputScratchView(teamMember.team_shmem(), polyOrder_);
416  }
417 
418  const auto & x = inputPoints_(pointOrdinal,0);
419  const auto & y = inputPoints_(pointOrdinal,1);
420  const auto & z = inputPoints_(pointOrdinal,2);
421 
422  // write as barycentric coordinates:
423  const PointScalar lambda[4] = {1. - x - y - z, x, y, z};
424  const PointScalar lambda_dx[4] = {-1., 1., 0., 0.};
425  const PointScalar lambda_dy[4] = {-1., 0., 1., 0.};
426  const PointScalar lambda_dz[4] = {-1., 0., 0., 1.};
427 
428  switch (opType_)
429  {
430  case OPERATOR_VALUE:
431  {
432  // face functions
433  {
434  // relabel scratch views
435  auto &scratchP = scratch0;
436  auto &scratchP_2ip1 = scratch1;
437 
438  const ordinal_type max_ij_sum = polyOrder_ - 1;
439 
440  for (ordinal_type faceOrdinal=0; faceOrdinal<numFaces; faceOrdinal++)
441  {
442  OutputScalar divWeight;
443  computeFaceDivWeight(divWeight, faceOrdinal, lambda_dx, lambda_dy, lambda_dz);
444 
445  OutputScalar vectorWeight_x, vectorWeight_y, vectorWeight_z;
446  computeFaceVectorWeight(vectorWeight_x, vectorWeight_y, vectorWeight_z, faceOrdinal, lambda, lambda_dx, lambda_dy, lambda_dz);
447 
448  ordinal_type fieldOrdinal = faceOrdinal * numFaceFunctionsPerFace_;
449  computeFaceLegendre(scratchP, faceOrdinal, lambda);
450 
451  for (int ij_sum=0; ij_sum <= max_ij_sum; ij_sum++)
452  {
453  for (int i=0; i<=ij_sum; i++)
454  {
455  computeFaceJacobi(scratchP_2ip1, faceOrdinal, i, lambda);
456 
457  const int j = ij_sum - i; // j >= 1
458 
459  auto & output_x = output_(fieldOrdinal,pointOrdinal,0);
460  auto & output_y = output_(fieldOrdinal,pointOrdinal,1);
461  auto & output_z = output_(fieldOrdinal,pointOrdinal,2);
462 
463  faceFunctionValue(output_x, output_y, output_z, i, j,
464  scratchP, scratchP_2ip1, vectorWeight_x,
465  vectorWeight_y, vectorWeight_z, lambda);
466 
467  fieldOrdinal++;
468  } // i
469  } // ij_sum
470  } // faceOrdinal
471  } // face functions block
472 
473  // interior functions
474  {
475  // relabel scratch views
476  auto &scratchP = scratch0;
477  auto &scratchP_2ip1 = scratch1;
478  auto &scratchL_2ipjp1 = scratch2; // L^{2(i+j+1)}, integrated Jacobi
479 
480  const ordinal_type min_ijk_sum = 1;
481  const ordinal_type max_ijk_sum = polyOrder_-1;
482  const ordinal_type min_ij_sum = 0;
483  const ordinal_type min_k = 1;
484  const ordinal_type min_j = 0;
485  const ordinal_type min_i = 0;
486 
487  OutputScalar vectorWeight_x, vectorWeight_y, vectorWeight_z;
488 
489  for (int interiorFamilyOrdinal=1; interiorFamilyOrdinal<=numInteriorFamilies; interiorFamilyOrdinal++)
490  {
491  // following ESEAS, we interleave the interior families. This groups all the interior dofs of a given degree together.
492 
493  ordinal_type interiorFamilyFieldOrdinal =
494  numFaceFunctions_ + interiorFamilyOrdinal - 1;
495 
496  const ordinal_type relatedFaceOrdinal = faceOrdinalForInterior_[interiorFamilyOrdinal-1];
497 
498  computeFaceLegendreForInterior(scratchP,
499  interiorFamilyOrdinal - 1, lambda);
500  computeFaceVectorWeight(vectorWeight_x, vectorWeight_y, vectorWeight_z, relatedFaceOrdinal, lambda, lambda_dx, lambda_dy, lambda_dz);
501 
502  for (int ijk_sum=min_ijk_sum; ijk_sum <= max_ijk_sum; ijk_sum++)
503  {
504  for (int ij_sum=min_ij_sum; ij_sum<=ijk_sum-min_k; ij_sum++)
505  {
506  for (int i=min_i; i<=ij_sum-min_j; i++)
507  {
508  const ordinal_type j = ij_sum-i;
509  const ordinal_type k = ijk_sum - ij_sum;
510 
512  scratchP_2ip1, interiorFamilyOrdinal - 1, i, lambda);
513  computeInteriorIntegratedJacobi(scratchL_2ipjp1, i, j,
514  interiorFamilyOrdinal - 1,
515  lambda);
516 
517  OutputScalar V_x, V_y, V_z;
518 
519  faceFunctionValue(V_x, V_y, V_z, i, j, scratchP,
520  scratchP_2ip1, vectorWeight_x,
521  vectorWeight_y, vectorWeight_z, lambda);
522 
523  auto &output_x =
524  output_(interiorFamilyFieldOrdinal, pointOrdinal, 0);
525  auto &output_y =
526  output_(interiorFamilyFieldOrdinal, pointOrdinal, 1);
527  auto &output_z =
528  output_(interiorFamilyFieldOrdinal, pointOrdinal, 2);
529 
530  output_x = V_x * scratchL_2ipjp1(k);
531  output_y = V_y * scratchL_2ipjp1(k);
532  output_z = V_z * scratchL_2ipjp1(k);
533 
534  interiorFamilyFieldOrdinal +=
535  numInteriorFamilies; // increment due to the
536  // interleaving.
537  }
538  }
539  }
540  }
541  } // interior functions block
542 
543  } // end OPERATOR_VALUE
544  break;
545  case OPERATOR_DIV:
546  {
547  // rename the scratch memory to match our usage here:
548  auto &scratchP = scratch0;
549  auto &scratchP_2ip1 = scratch1;
550 
551  // following ESEAS, we interleave the face families. This groups all the face dofs of a given degree together.
552  ordinal_type fieldOrdinal = 0;
553  for (int faceOrdinal=0; faceOrdinal<numFaces; faceOrdinal++)
554  {
555  const int max_ij_sum = polyOrder_ - 1;
556  computeFaceLegendre(scratchP, faceOrdinal, lambda);
557  OutputScalar divWeight;
558  computeFaceDivWeight(divWeight, faceOrdinal, lambda_dx, lambda_dy, lambda_dz);
559  for (int ij_sum=0; ij_sum <= max_ij_sum; ij_sum++)
560  {
561  for (int i=0; i<=ij_sum; i++)
562  {
563  const int j = ij_sum - i; // j >= 0
564 
565  computeFaceJacobi(scratchP_2ip1, faceOrdinal, i, lambda);
566  auto &outputValue = output_(fieldOrdinal,pointOrdinal);
567  faceFunctionDiv(outputValue, i, j, scratchP, scratchP_2ip1,
568  divWeight, lambda);
569 
570  fieldOrdinal++;
571  } // i
572  } // ij_sum
573  } // faceOrdinal
574 
575  // interior functions
576  {
577  // rename the scratch memory to match our usage here:
578  auto &scratchL_2ipjp1 = scratch2;
579  auto &scratchP_2ipjp1 = scratch3;
580 
581  const int interiorFieldOrdinalOffset = numFaceFunctions_;
582  for (int interiorFamilyOrdinal=1; interiorFamilyOrdinal<=numInteriorFamilies; interiorFamilyOrdinal++)
583  {
584  // following ESEAS, we interleave the interior families. This groups all the interior dofs of a given degree together.
585 
586  const ordinal_type relatedFaceOrdinal = faceOrdinalForInterior_[interiorFamilyOrdinal-1];
587 
588  computeFaceLegendreForInterior(scratchP,
589  interiorFamilyOrdinal - 1, lambda);
590  OutputScalar divWeight;
591  computeFaceDivWeight(divWeight, relatedFaceOrdinal, lambda_dx, lambda_dy, lambda_dz);
592 
593  OutputScalar vectorWeight_x, vectorWeight_y, vectorWeight_z;
594  computeFaceVectorWeight(vectorWeight_x, vectorWeight_y, vectorWeight_z, relatedFaceOrdinal, lambda, lambda_dx, lambda_dy, lambda_dz);
595 
596  ordinal_type interiorFieldOrdinal = interiorFieldOrdinalOffset + interiorFamilyOrdinal - 1;
597 
598  const ordinal_type min_ijk_sum = 1;
599  const ordinal_type max_ijk_sum = polyOrder_-1;
600  const ordinal_type min_ij_sum = 0;
601  const ordinal_type min_k = 1;
602  const ordinal_type min_j = 0;
603  const ordinal_type min_i = 0;
604  for (int ijk_sum=min_ijk_sum; ijk_sum <= max_ijk_sum; ijk_sum++)
605  {
606  for (int ij_sum=min_ij_sum; ij_sum<=ijk_sum-min_k; ij_sum++)
607  {
608  for (int i=min_i; i<=ij_sum-min_j; i++)
609  {
610  const ordinal_type j = ij_sum-i;
611  const ordinal_type k = ijk_sum - ij_sum;
613  scratchP_2ip1, interiorFamilyOrdinal - 1, i, lambda);
614 
615  OutputScalar faceDiv;
616  faceFunctionDiv(faceDiv, i, j, scratchP, scratchP_2ip1,
617  divWeight, lambda);
618 
619  OutputScalar faceValue_x, faceValue_y, faceValue_z;
620 
621  faceFunctionValue(faceValue_x, faceValue_y, faceValue_z, i,
622  j, scratchP, scratchP_2ip1,
623  vectorWeight_x, vectorWeight_y,
624  vectorWeight_z, lambda);
625  computeInteriorJacobi(scratchP_2ipjp1, i, j,
626  interiorFamilyOrdinal - 1, lambda);
627 
628  computeInteriorIntegratedJacobi(scratchL_2ipjp1, i, j,
629  interiorFamilyOrdinal - 1,
630  lambda);
631 
632  OutputScalar L_2ipjp1_k_dx, L_2ipjp1_k_dy, L_2ipjp1_k_dz;
633  gradInteriorIntegratedJacobi(
634  L_2ipjp1_k_dx, L_2ipjp1_k_dy, L_2ipjp1_k_dz,
635  interiorFamilyOrdinal - 1, j, k, scratchP_2ipjp1,
636  lambda, lambda_dx, lambda_dy, lambda_dz);
637 
638  auto &outputDiv = output_(interiorFieldOrdinal, pointOrdinal);
639  interiorFunctionDiv(outputDiv, scratchL_2ipjp1(k), faceDiv,
640  L_2ipjp1_k_dx, L_2ipjp1_k_dy,
641  L_2ipjp1_k_dz, faceValue_x, faceValue_y,
642  faceValue_z);
643 
644  interiorFieldOrdinal += numInteriorFamilies; // increment due to the interleaving.
645  }
646  }
647  }
648  }
649  } // interior functions block
650  } // OPERATOR_DIV
651  break;
652  case OPERATOR_GRAD:
653  case OPERATOR_D1:
654  case OPERATOR_D2:
655  case OPERATOR_D3:
656  case OPERATOR_D4:
657  case OPERATOR_D5:
658  case OPERATOR_D6:
659  case OPERATOR_D7:
660  case OPERATOR_D8:
661  case OPERATOR_D9:
662  case OPERATOR_D10:
663  INTREPID2_TEST_FOR_ABORT( true,
664  ">>> ERROR: (Intrepid2::Hierarchical_HDIV_TET_Functor) Unsupported differential operator");
665  default:
666  // unsupported operator type
667  device_assert(false);
668  }
669  }
670 
671  // Provide the shared memory capacity.
672  // This function takes the team_size as an argument,
673  // which allows team_size-dependent allocations.
674  size_t team_shmem_size (int team_size) const
675  {
676  // we will use shared memory to create a fast buffer for basis computations
677  size_t shmem_size = 0;
678  if (fad_size_output_ > 0)
679  shmem_size += 4 * OutputScratchView::shmem_size(polyOrder_ + 1, fad_size_output_);
680  else
681  shmem_size += 4 * OutputScratchView::shmem_size(polyOrder_ + 1);
682 
683  return shmem_size;
684  }
685  };
686 
695  template<typename DeviceType,
696  typename OutputScalar = double,
697  typename PointScalar = double,
698  bool useCGBasis = true> // if useCGBasis is false, all basis functions will be associated with the interior
700  : public Basis<DeviceType,OutputScalar,PointScalar>
701  {
702  public:
704 
705  using typename BasisBase::OrdinalTypeArray1DHost;
706  using typename BasisBase::OrdinalTypeArray2DHost;
707 
708  using typename BasisBase::OutputViewType;
709  using typename BasisBase::PointViewType;
710  using typename BasisBase::ScalarViewType;
711 
712  using typename BasisBase::ExecutionSpace;
713 
714  protected:
715  int polyOrder_; // the maximum order of the polynomial
716  public:
721  HierarchicalBasis_HDIV_TET(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
722  :
723  polyOrder_(polyOrder)
724  {
725  const shards::CellTopology cellTopo(shards::getCellTopologyData<shards::Tetrahedron<> >());
726  this->basisCellTopologyKey_ = shards::Tetrahedron<>::key;
727  const int numFaces = cellTopo.getFaceCount();
728 
729  const int numVertexFunctions = 0;
730  const int numEdgeFunctions = 0;
731  const int numFaceFunctions = numFaces * polyOrder * (polyOrder+1) / 2; // 4 faces; 2 families, each with p*(p-1)/2 functions per face
732  const int numInteriorFunctionsPerFamily = (polyOrder-1)*polyOrder*(polyOrder+1)/6;
733  const int numInteriorFunctions = numInteriorFunctionsPerFamily * 3; // 3 families of interior functions
734  this->basisCardinality_ = numVertexFunctions + numEdgeFunctions + numFaceFunctions + numInteriorFunctions;
735  this->basisDegree_ = polyOrder;
736 
737  this->basisType_ = BASIS_FEM_HIERARCHICAL;
738  this->basisCoordinates_ = COORDINATES_CARTESIAN;
739  this->functionSpace_ = FUNCTION_SPACE_HDIV;
740 
741  const int degreeLength = 1;
742  this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("Hierarchical H(div) triangle polynomial degree lookup", this->basisCardinality_, degreeLength);
743 
744  // **** vertex functions **** //
745  // no vertex functions in H(div)
746 
747  // **** edge functions **** //
748  // no edge functions in H(div)
749 
750  // **** face functions **** //
751  const int max_ij_sum = polyOrder-1;
752  ordinal_type fieldOrdinal = 0;
753  for (int faceOrdinal=0; faceOrdinal<numFaces; faceOrdinal++)
754  {
755  for (int ij_sum=0; ij_sum <= max_ij_sum; ij_sum++)
756  {
757  for (int i=0; i<=ij_sum; i++)
758  {
759  this->fieldOrdinalPolynomialDegree_(fieldOrdinal,0) = ij_sum+1;
760  fieldOrdinal++;
761  }
762  }
763  }
764  INTREPID2_TEST_FOR_EXCEPTION(fieldOrdinal != numEdgeFunctions + numFaceFunctions, std::invalid_argument, "Internal error: basis enumeration is incorrect");
765 
766  const int numInteriorFamilies = 3;
767  const int interiorFieldOrdinalOffset = fieldOrdinal;
768  const ordinal_type min_ijk_sum = 1;
769  const ordinal_type max_ijk_sum = polyOrder_-1;
770  const ordinal_type min_ij_sum = 0;
771  const ordinal_type min_k = 1;
772  const ordinal_type min_j = 0;
773  const ordinal_type min_i = 0;
774  for (int interiorFamilyOrdinal=1; interiorFamilyOrdinal<=numInteriorFamilies; interiorFamilyOrdinal++)
775  {
776  // following ESEAS, we interleave the interior families. This groups all the interior dofs of a given degree together.
777  fieldOrdinal = interiorFieldOrdinalOffset + interiorFamilyOrdinal - 1;
778  for (int ijk_sum=min_ijk_sum; ijk_sum <= max_ijk_sum; ijk_sum++)
779  {
780  for (int ij_sum=min_ij_sum; ij_sum<=ijk_sum-min_k; ij_sum++)
781  {
782  for (int i=min_i; i<=ij_sum-min_j; i++)
783  {
784  this->fieldOrdinalPolynomialDegree_(fieldOrdinal,0) = ijk_sum+1;
785  fieldOrdinal += numInteriorFamilies; // increment due to the interleaving.
786  }
787  }
788  }
789  fieldOrdinal = fieldOrdinal - numInteriorFamilies + 1; // due to the interleaving increment, we've gone numInteriorFamilies past the last interior ordinal. Set fieldOrdinal to be one past.
790  }
791 
792  INTREPID2_TEST_FOR_EXCEPTION(fieldOrdinal != this->basisCardinality_, std::invalid_argument, "Internal error: basis enumeration is incorrect");
793 
794  // initialize tags
795  {
796  // ESEAS numbers tetrahedron faces differently from Intrepid2
797  // ESEAS: 012, 013, 123, 023
798  // Intrepid2: 013, 123, 032, 021
799  const int intrepid2FaceOrdinals[4] {3,0,1,2}; // index is the ESEAS face ordinal; value is the intrepid2 ordinal
800 
801  const auto & cardinality = this->basisCardinality_;
802 
803  // Basis-dependent initializations
804  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
805  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
806  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
807  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
808 
809  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
810  const ordinal_type faceDim = 2, volumeDim = 3;
811 
812  if (useCGBasis) {
813  {
814  int tagNumber = 0;
815  const int numFunctionsPerFace = numFaceFunctions / numFaces;
816  for (int faceOrdinalESEAS=0; faceOrdinalESEAS<numFaces; faceOrdinalESEAS++)
817  {
818  int faceOrdinalIntrepid2 = intrepid2FaceOrdinals[faceOrdinalESEAS];
819  for (int functionOrdinal=0; functionOrdinal<numFunctionsPerFace; functionOrdinal++)
820  {
821  tagView(tagNumber*tagSize+0) = faceDim; // face dimension
822  tagView(tagNumber*tagSize+1) = faceOrdinalIntrepid2; // face id
823  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
824  tagView(tagNumber*tagSize+3) = numFunctionsPerFace; // total number of dofs on this face
825  tagNumber++;
826  }
827  }
828 
829  // interior
830  for (int functionOrdinal=0; functionOrdinal<numInteriorFunctions; functionOrdinal++)
831  {
832  tagView(tagNumber*tagSize+0) = volumeDim; // interior dimension
833  tagView(tagNumber*tagSize+1) = 0; // volume id
834  tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
835  tagView(tagNumber*tagSize+3) = numInteriorFunctions; // total number of interior dofs
836  tagNumber++;
837  }
838  }
839  }
840  else
841  {
842  // DG basis: all functions are associated with interior
843  for (ordinal_type i=0;i<cardinality;++i) {
844  tagView(i*tagSize+0) = volumeDim; // face dimension
845  tagView(i*tagSize+1) = 0; // interior/volume id
846  tagView(i*tagSize+2) = i; // local dof id
847  tagView(i*tagSize+3) = cardinality; // total number of dofs on this face
848  }
849  }
850 
851  // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
852  // tags are constructed on host
853  this->setOrdinalTagData(this->tagToOrdinal_,
854  this->ordinalToTag_,
855  tagView,
856  this->basisCardinality_,
857  tagSize,
858  posScDim,
859  posScOrd,
860  posDfOrd);
861  }
862  }
863 
868  const char* getName() const override {
869  return "Intrepid2_HierarchicalBasis_HDIV_TET";
870  }
871 
874  virtual bool requireOrientation() const override {
875  return (this->getDegree() > 2);
876  }
877 
878  // since the getValues() below only overrides the FEM variant, we specify that
879  // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
880  // (It's an error to use the FVD variant on this basis.)
881  using BasisBase::getValues;
882 
901  virtual void getValues( OutputViewType outputValues, const PointViewType inputPoints,
902  const EOperator operatorType = OPERATOR_VALUE ) const override
903  {
904  auto numPoints = inputPoints.extent_int(0);
905 
907 
908  FunctorType functor(operatorType, outputValues, inputPoints, polyOrder_);
909 
910  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputScalar>();
911  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointScalar>();
912  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
913  const int teamSize = 1; // because of the way the basis functions are computed, we don't have a second level of parallelism...
914 
915  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(numPoints,teamSize,vectorSize);
916  Kokkos::parallel_for("Hierarchical_HDIV_TET_Functor", policy , functor);
917  }
918 
927  BasisPtr<DeviceType,OutputScalar,PointScalar>
928  getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override
929  {
931  if (subCellDim == 2)
932  {
933  return Teuchos::rcp(new HVOL_Tri(this->basisDegree_-1));
934  }
935  INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Input parameters out of bounds");
936  }
937 
942  virtual BasisPtr<typename Kokkos::HostSpace::device_type, OutputScalar, PointScalar>
943  getHostBasis() const override {
944  using HostDeviceType = typename Kokkos::HostSpace::device_type;
946  return Teuchos::rcp( new HostBasisType(polyOrder_) );
947  }
948  };
949 } // end namespace Intrepid2
950 
951 #endif /* Intrepid2_HierarchicalBasis_HDIV_TET_h */
HierarchicalBasis_HDIV_TET(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
Constructor.
Kokkos::View< ordinal_type *, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
View type for 1d host array.
unsigned basisCellTopologyKey_
Identifier of the base topology of the cells for which the basis is defined. See the Shards package f...
KOKKOS_INLINE_FUNCTION void computeFaceJacobiForInterior(OutputScratchView &P_2ip1, const ordinal_type &zeroBasedInteriorFamilyOrdinal, const ordinal_type &i, const PointScalar *lambda) const
The face functions we compute for interior blending can have a different orientation than the ones us...
Kokkos::View< ordinal_type *, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
View type for 1d host array.
Functor for computing values for the HierarchicalBasis_HDIV_TET class.
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
virtual bool requireOrientation() const override
True if orientation is required.
Kokkos::DynRankView< scalarType, Kokkos::LayoutStride, DeviceType > ScalarViewType
View type for scalars.
Free functions, callable from device code, that implement various polynomials useful in basis definit...
EFunctionSpace functionSpace_
The function space in which the basis is defined.
ordinal_type basisDegree_
Degree of the largest complete polynomial space that can be represented by the basis.
virtual KOKKOS_INLINE_FUNCTION void getValues(OutputViewType, const PointViewType, const EOperator, const typename Kokkos::TeamPolicy< ExecutionSpace >::member_type &teamMember, const typename ExecutionSpace::scratch_memory_space &scratchStorage, const ordinal_type subcellDim=-1, const ordinal_type subcellOrdinal=-1) const
Team-level evaluation of basis functions on a reference cell.
Header function for Intrepid2::Util class and other utility functions.
Kokkos::View< ordinal_type **, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray2DHost
View type for 2d host array.
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, DeviceType > PointViewType
View type for input points.
BasisPtr< DeviceType, OutputScalar, PointScalar > getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override
returns the basis associated to a subCell.
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, DeviceType > OutputViewType
View type for basis value output.
ordinal_type basisCardinality_
Cardinality of the basis, i.e., the number of basis functions/degrees-of-freedom. ...
virtual void getValues(OutputViewType outputValues, const PointViewType inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell.
For mathematical details of the construction, see:
OrdinalTypeArray3DHost tagToOrdinal_
DoF tag to ordinal lookup table.
const char * getName() const override
Returns basis name.
OrdinalTypeArray2DHost ordinalToTag_
&quot;true&quot; if tagToOrdinal_ and ordinalToTag_ have been initialized
ordinal_type getDegree() const
Returns the degree of the basis.
ECoordinates basisCoordinates_
The coordinate system for which the basis is defined.
OrdinalTypeArray2DHost fieldOrdinalPolynomialDegree_
Polynomial degree for each degree of freedom. Only defined for hierarchical bases right now...
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, DeviceType > OutputViewType
View type for basis value output.
Basis defining Legendre basis on the line, a polynomial subspace of H(vol) on the line: extension to ...
void setOrdinalTagData(OrdinalTypeView3D &tagToOrdinal, OrdinalTypeView2D &ordinalToTag, const OrdinalTypeView1D tags, const ordinal_type basisCard, const ordinal_type tagSize, const ordinal_type posScDim, const ordinal_type posScOrd, const ordinal_type posDfOrd)
Fills ordinalToTag_ and tagToOrdinal_ by basis-specific tag data.
H(vol) basis on the triangle based on integrated Legendre polynomials.
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, DeviceType > PointViewType
View type for input points.
Header file for the abstract base class Intrepid2::Basis.
typename DeviceType::execution_space ExecutionSpace
(Kokkos) Execution space for basis.
virtual BasisPtr< typename Kokkos::HostSpace::device_type, OutputScalar, PointScalar > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...