Intrepid2
Intrepid2_TensorBasis.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Intrepid2 Package
4 //
5 // Copyright 2007 NTESS and the Intrepid2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
15 #ifndef Intrepid2_TensorBasis_h
16 #define Intrepid2_TensorBasis_h
17 
18 #include <Kokkos_DynRankView.hpp>
19 
20 #include <Teuchos_RCP.hpp>
21 
22 #include <Intrepid2_config.h>
23 
24 #include <map>
25 #include <set>
26 #include <vector>
27 
28 #include "Intrepid2_Basis.hpp"
32 #include "Intrepid2_Utils.hpp" // defines FAD_VECTOR_SIZE, VECTOR_SIZE
33 
35 
36 namespace Intrepid2
37 {
38  template<ordinal_type spaceDim>
39  KOKKOS_INLINE_FUNCTION
40  ordinal_type getDkEnumeration(const Kokkos::Array<int,spaceDim> &entries);
41 
42  template<ordinal_type spaceDim>
43  KOKKOS_INLINE_FUNCTION
44  void getDkEnumerationInverse(Kokkos::Array<int,spaceDim> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder);
45 
46  template<>
47  KOKKOS_INLINE_FUNCTION
48  void getDkEnumerationInverse<1>(Kokkos::Array<int,1> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
49  {
50  entries[0] = operatorOrder;
51  }
52 
53  template<>
54  KOKKOS_INLINE_FUNCTION
55  void getDkEnumerationInverse<2>(Kokkos::Array<int,2> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
56  {
57  entries[0] = operatorOrder - dkEnum;
58  entries[1] = dkEnum;
59  }
60 
61  template<>
62  KOKKOS_INLINE_FUNCTION
63  void getDkEnumerationInverse<3>(Kokkos::Array<int,3> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
64  {
65  // formula is zMult + (yMult+zMult)*(yMult+zMult+1)/2; where xMult+yMult+zMult = operatorOrder
66  // it seems complicated to work out a formula that will invert this. For the present we just take a brute force approach,
67  // using getDkEnumeration() to check each possibility
68  for (ordinal_type yMult=0; yMult<=operatorOrder; yMult++)
69  {
70  for (ordinal_type zMult=0; zMult<=operatorOrder-yMult; zMult++)
71  {
72  const ordinal_type xMult = operatorOrder-(zMult+yMult);
73  if (dkEnum == getDkEnumeration<3>(xMult,yMult,zMult))
74  {
75  entries[0] = xMult;
76  entries[1] = yMult;
77  entries[2] = zMult;
78  return;
79  }
80  }
81  }
82  }
83 
84  template<ordinal_type spaceDim>
85  KOKKOS_INLINE_FUNCTION
86  void getDkEnumerationInverse(Kokkos::Array<int,spaceDim> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
87  {
88  // for operator order k, the recursive formula defining getDkEnumeration is:
89  // getDkEnumeration(k0,k1,…,k_{n-1}) = getDkCardinality(k - k0) + getDkEnumeration(k1,…,k_{n-1})
90  // The entries are in reverse lexicographic order. We search for k0, by incrementing k0 until getDkEnumeration(k0,0,…,0) <= dkEnum
91  // Then we recursively call getDkEnumerationInverse<spaceDim-1>({k1,…,k_{n-1}}, dkEnum - getDkEnumeration(k0,0,…,0) - 1)
92 
93  for (int k0=0; k0<=operatorOrder; k0++)
94  {
95  entries[0] = k0;
96  for (int d=1; d<spaceDim-1; d++)
97  {
98  entries[d] = 0;
99  }
100  // sum of entries must be equal to operatorOrder
101  if (spaceDim > 1) entries[spaceDim-1] = operatorOrder - k0;
102  else if (k0 != operatorOrder) continue; // if spaceDim == 1, then the only way the sum of the entries is operatorOrder is if k0 == operatorOrder
103  const ordinal_type dkEnumFor_k0 = getDkEnumeration<spaceDim>(entries);
104 
105  if (dkEnumFor_k0 > dkEnum) continue; // next k0
106  else if (dkEnumFor_k0 == dkEnum) return; // entries has (k0,0,…,0), and this has dkEnum as its enumeration value
107  else
108  {
109  // (k0,0,…,0) is prior to the dkEnum entry, which means that the dkEnum entry starts with k0-1.
110  entries[0] = k0 - 1;
111 
112  // We determine the rest of the entries through a recursive call to getDkEnumerationInverse<spaceDim - 1>().
113 
114  // ensure that we don't try to allocate an empty array…
115  constexpr ordinal_type sizeForSubArray = (spaceDim > 2) ? spaceDim - 1 : 1;
116  Kokkos::Array<int,sizeForSubArray> subEntries = {};
117 
118  // the -1 in sub-entry enumeration value accounts for the fact that the entry is the one *after* (k0,0,…,0)
119  getDkEnumerationInverse<spaceDim-1>(subEntries, dkEnum - dkEnumFor_k0 - 1, operatorOrder - entries[0]);
120 
121  for (int i=1; i<spaceDim; i++)
122  {
123  entries[i] = subEntries[i-1];
124  }
125  return;
126  }
127  }
128  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "entries corresponding to dkEnum not found");
129  }
130 
131  template<>
132  KOKKOS_INLINE_FUNCTION
133  ordinal_type getDkEnumeration<1>(const Kokkos::Array<int,1> &entries)
134  {
135  return getDkEnumeration<1>(entries[0]);
136  }
137 
138  template<ordinal_type spaceDim>
139  KOKKOS_INLINE_FUNCTION
140  ordinal_type getDkEnumeration(const Kokkos::Array<int,spaceDim> &entries)
141  {
142  ordinal_type k_minus_k0 = 0; // sum of all the entries but the first
143 
144  // recursive formula in general is: getDkEnumeration(k0,k1,…,k_{n-1}) = getDkCardinality(k - k0) + getDkEnumeration(k1,…,k_{n-1})
145  // ensure that we don't try to allocate an empty array…
146  constexpr ordinal_type sizeForSubArray = (spaceDim > 2) ? spaceDim - 1 : 1;
147  Kokkos::Array<int,sizeForSubArray> remainingEntries;
148  for (int i=1; i<spaceDim; i++)
149  {
150  k_minus_k0 += entries[i];
151  remainingEntries[i-1] = entries[i];
152  }
153 
154  if (k_minus_k0 == 0)
155  {
156  return 0;
157  }
158  else
159  {
160  EOperator opFor_k_minus_k0_minus_1 = (k_minus_k0 > 1) ? EOperator(OPERATOR_D1 + k_minus_k0 - 2) : EOperator(OPERATOR_VALUE);
161  const ordinal_type dkCardinality = getDkCardinality(opFor_k_minus_k0_minus_1, spaceDim);
162  const ordinal_type dkEnum = dkCardinality + getDkEnumeration<sizeForSubArray>(remainingEntries);
163  return dkEnum;
164  }
165  }
166 
167  template<ordinal_type spaceDim1, ordinal_type spaceDim2>
168  KOKKOS_INLINE_FUNCTION
169  ordinal_type getDkTensorIndex(const ordinal_type dkEnum1, const ordinal_type operatorOrder1,
170  const ordinal_type dkEnum2, const ordinal_type operatorOrder2)
171  {
172  Kokkos::Array<int,spaceDim1> entries1 = {};
173  getDkEnumerationInverse<spaceDim1>(entries1, dkEnum1, operatorOrder1);
174 
175  Kokkos::Array<int,spaceDim2> entries2 = {};
176  getDkEnumerationInverse<spaceDim2>(entries2, dkEnum2, operatorOrder2);
177 
178  const int spaceDim = spaceDim1 + spaceDim2;
179  Kokkos::Array<int,spaceDim> entries;
180 
181  for (ordinal_type d=0; d<spaceDim1; d++)
182  {
183  entries[d] = entries1[d];
184  }
185 
186  for (ordinal_type d=0; d<spaceDim2; d++)
187  {
188  entries[d+spaceDim1] = entries2[d];
189  }
190 
191  return getDkEnumeration<spaceDim>(entries);
192  }
193 
194 template<typename BasisBase>
196 
201 {
202  // if we want to make this usable on device, we could switch to Kokkos::Array instead of std::vector. But this is not our immediate use case.
203  std::vector< std::vector<EOperator> > ops; // outer index: vector entry ordinal; inner index: basis component ordinal. (scalar-valued operators have a single entry in outer vector)
204  std::vector<double> weights; // weights for each vector entry
205  ordinal_type numBasisComponents_;
206  bool rotateXYNinetyDegrees_ = false; // if true, indicates that something that otherwise would have values (f_x, f_y, …) should be mapped to (-f_y, f_x, …). This is used for H(curl) wedges (specifically, for OPERATOR_CURL).
207 
208  OperatorTensorDecomposition(const std::vector<EOperator> &opsBasis1, const std::vector<EOperator> &opsBasis2, const std::vector<double> vectorComponentWeights)
209  :
210  weights(vectorComponentWeights),
211  numBasisComponents_(2)
212  {
213  const ordinal_type size = opsBasis1.size();
214  const ordinal_type opsBasis2Size = opsBasis2.size();
215  const ordinal_type weightsSize = weights.size();
216  INTREPID2_TEST_FOR_EXCEPTION(size != opsBasis2Size, std::invalid_argument, "opsBasis1.size() != opsBasis2.size()");
217  INTREPID2_TEST_FOR_EXCEPTION(size != weightsSize, std::invalid_argument, "opsBasis1.size() != weights.size()");
218 
219  for (ordinal_type i=0; i<size; i++)
220  {
221  ops.push_back(std::vector<EOperator>{opsBasis1[i],opsBasis2[i]});
222  }
223  }
224 
225  OperatorTensorDecomposition(const std::vector< std::vector<EOperator> > &vectorEntryOps, const std::vector<double> &vectorComponentWeights)
226  :
227  ops(vectorEntryOps),
228  weights(vectorComponentWeights)
229  {
230  const ordinal_type numVectorComponents = ops.size();
231  const ordinal_type weightsSize = weights.size();
232  INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents != weightsSize, std::invalid_argument, "opsBasis1.size() != weights.size()");
233 
234  INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents == 0, std::invalid_argument, "must have at least one entry!");
235 
236  ordinal_type numBases = 0;
237  for (ordinal_type i=0; i<numVectorComponents; i++)
238  {
239  if (numBases == 0)
240  {
241  numBases = ops[i].size();
242  }
243  else if (ops[i].size() != 0)
244  {
245  const ordinal_type opsiSize = ops[i].size();
246  INTREPID2_TEST_FOR_EXCEPTION(numBases != opsiSize, std::invalid_argument, "must have one operator for each basis in each nontrivial entry in vectorEntryOps");
247  }
248  }
249  INTREPID2_TEST_FOR_EXCEPTION(numBases == 0, std::invalid_argument, "at least one vectorEntryOps entry must be non-trivial");
250  numBasisComponents_ = numBases;
251  }
252 
253  OperatorTensorDecomposition(const std::vector<EOperator> &basisOps, const double weight = 1.0)
254  :
255  ops({basisOps}),
256  weights({weight}),
257  numBasisComponents_(basisOps.size())
258  {}
259 
260  OperatorTensorDecomposition(const EOperator &opBasis1, const EOperator &opBasis2, double weight = 1.0)
261  :
262  ops({ std::vector<EOperator>{opBasis1, opBasis2} }),
263  weights({weight}),
264  numBasisComponents_(2)
265  {}
266 
267  OperatorTensorDecomposition(const EOperator &opBasis1, const EOperator &opBasis2, const EOperator &opBasis3, double weight = 1.0)
268  :
269  ops({ std::vector<EOperator>{opBasis1, opBasis2, opBasis3} }),
270  weights({weight}),
271  numBasisComponents_(3)
272  {}
273 
274  ordinal_type numVectorComponents() const
275  {
276  return ops.size(); // will match weights.size()
277  }
278 
279  ordinal_type numBasisComponents() const
280  {
281  return numBasisComponents_;
282  }
283 
284  double weight(const ordinal_type &vectorComponentOrdinal) const
285  {
286  return weights[vectorComponentOrdinal];
287  }
288 
289  bool identicallyZeroComponent(const ordinal_type &vectorComponentOrdinal) const
290  {
291  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument, "vectorComponentOrdinal is out of bounds");
292  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument, "vectorComponentOrdinal is out of bounds");
293  return ops[vectorComponentOrdinal].size() == 0;
294  }
295 
296  EOperator op(const ordinal_type &vectorComponentOrdinal, const ordinal_type &basisOrdinal) const
297  {
298  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument, "vectorComponentOrdinal is out of bounds");
299  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument, "vectorComponentOrdinal is out of bounds");
300  if (identicallyZeroComponent(vectorComponentOrdinal))
301  {
302  return OPERATOR_MAX; // by convention: zero in this component
303  }
304  else
305  {
306  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal < 0, std::invalid_argument, "basisOrdinal is out of bounds");
307  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal >= numBasisComponents_, std::invalid_argument, "basisOrdinal is out of bounds");
308  return ops[vectorComponentOrdinal][basisOrdinal];
309  }
310  }
311 
313  template<typename DeviceType, typename OutputValueType, class PointValueType>
315  {
316  const ordinal_type basesSize = bases.size();
317  INTREPID2_TEST_FOR_EXCEPTION(basesSize != numBasisComponents_, std::invalid_argument, "The number of bases provided must match the number of basis components in this decomposition");
318 
319  ordinal_type numExpandedBasisComponents = 0;
321  using TensorBasis = Basis_TensorBasis<BasisBase>;
322  std::vector<TensorBasis*> basesAsTensorBasis(numBasisComponents_);
323  for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
324  {
325  TensorBasis* basisAsTensorBasis = dynamic_cast<TensorBasis*>(bases[basisComponentOrdinal].get());
326  basesAsTensorBasis[basisComponentOrdinal] = basisAsTensorBasis;
327  if (basisAsTensorBasis)
328  {
329  numExpandedBasisComponents += basisAsTensorBasis->getTensorBasisComponents().size();
330  }
331  else
332  {
333  numExpandedBasisComponents += 1;
334  }
335  }
336 
337  std::vector< std::vector<EOperator> > expandedOps; // outer index: vector entry ordinal; inner index: basis component ordinal.
338  std::vector<double> expandedWeights;
339  const ordinal_type opsSize = ops.size();
340  for (ordinal_type simpleVectorEntryOrdinal=0; simpleVectorEntryOrdinal<opsSize; simpleVectorEntryOrdinal++)
341  {
342  if (identicallyZeroComponent(simpleVectorEntryOrdinal))
343  {
344  expandedOps.push_back(std::vector<EOperator>{});
345  expandedWeights.push_back(0.0);
346  continue;
347  }
348 
349  std::vector< std::vector<EOperator> > expandedBasisOpsForSimpleVectorEntry(1); // start out with one outer entry; expands if a component is vector-valued
350 
351  // this lambda appends an op to each of the vector components
352  auto addExpandedOp = [&expandedBasisOpsForSimpleVectorEntry](const EOperator &op)
353  {
354  const ordinal_type size = expandedBasisOpsForSimpleVectorEntry.size();
355  for (ordinal_type i=0; i<size; i++)
356  {
357  expandedBasisOpsForSimpleVectorEntry[i].push_back(op);
358  }
359  };
360 
361  // this lambda takes a scalar-valued (single outer entry) expandedBasisOps and expands it
362  // according to the number of vector entries coming from the vector-valued component basis
363  auto vectorizeExpandedOps = [&expandedBasisOpsForSimpleVectorEntry](const int &numSubVectors)
364  {
365  // we require that this only gets called once per simpleVectorEntryOrdinal -- i.e., only one basis component gets to be vector-valued.
366  INTREPID2_TEST_FOR_EXCEPTION(expandedBasisOpsForSimpleVectorEntry.size() != 1, std::invalid_argument, "multiple basis components may not be vector-valued!");
367  for (ordinal_type i=1; i<numSubVectors; i++)
368  {
369  expandedBasisOpsForSimpleVectorEntry.push_back(expandedBasisOpsForSimpleVectorEntry[0]);
370  }
371  };
372 
373  std::vector<EOperator> subVectorOps; // only used if one of the components is vector-valued
374  std::vector<double> subVectorWeights {weights[simpleVectorEntryOrdinal]};
375  for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
376  {
377  const auto &op = ops[simpleVectorEntryOrdinal][basisComponentOrdinal];
378 
379  if (! basesAsTensorBasis[basisComponentOrdinal])
380  {
381  addExpandedOp(op);
382  }
383  else
384  {
385  OperatorTensorDecomposition basisOpDecomposition = basesAsTensorBasis[basisComponentOrdinal]->getOperatorDecomposition(op);
386  if (basisOpDecomposition.numVectorComponents() > 1)
387  {
388  // We don't currently support a use case where we have multiple component bases that are vector-valued:
389  INTREPID2_TEST_FOR_EXCEPTION(subVectorWeights.size() > 1, std::invalid_argument, "Unhandled case: multiple component bases are vector-valued");
390  // We do support a single vector-valued case, though; this splits the current simpleVectorEntryOrdinal into an appropriate number of components that come in order in the expanded vector
391  ordinal_type numSubVectors = basisOpDecomposition.numVectorComponents();
392  vectorizeExpandedOps(numSubVectors);
393 
394  double weightSoFar = subVectorWeights[0];
395  for (ordinal_type subVectorEntryOrdinal=1; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
396  {
397  subVectorWeights.push_back(weightSoFar * basisOpDecomposition.weight(subVectorEntryOrdinal));
398  }
399  subVectorWeights[0] *= basisOpDecomposition.weight(0);
400  for (ordinal_type subVectorEntryOrdinal=0; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
401  {
402  for (ordinal_type subComponentBasis=0; subComponentBasis<basisOpDecomposition.numBasisComponents(); subComponentBasis++)
403  {
404  const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, subComponentBasis);
405  expandedBasisOpsForSimpleVectorEntry[subVectorEntryOrdinal].push_back(basisOp);
406  }
407  }
408  }
409  else
410  {
411  double componentWeight = basisOpDecomposition.weight(0);
412  const ordinal_type size = subVectorWeights.size();
413  for (ordinal_type i=0; i<size; i++)
414  {
415  subVectorWeights[i] *= componentWeight;
416  }
417  ordinal_type subVectorEntryOrdinal = 0;
418  const ordinal_type numBasisComponents = basisOpDecomposition.numBasisComponents();
419  for (ordinal_type subComponentBasis=0; subComponentBasis<numBasisComponents; subComponentBasis++)
420  {
421  const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, basisComponentOrdinal);
422  addExpandedOp( basisOp );
423  }
424  }
425  }
426  }
427 
428  // sanity check on the new expandedOps entries:
429  for (ordinal_type i=0; i<static_cast<ordinal_type>(expandedBasisOpsForSimpleVectorEntry.size()); i++)
430  {
431  const ordinal_type size = expandedBasisOpsForSimpleVectorEntry[i].size();
432  INTREPID2_TEST_FOR_EXCEPTION(size != numExpandedBasisComponents, std::logic_error, "each vector in expandedBasisOpsForSimpleVectorEntry should have as many entries as there are expanded basis components");
433  }
434 
435  expandedOps.insert(expandedOps.end(), expandedBasisOpsForSimpleVectorEntry.begin(), expandedBasisOpsForSimpleVectorEntry.end());
436  expandedWeights.insert(expandedWeights.end(), subVectorWeights.begin(), subVectorWeights.end());
437  }
438  // check that vector lengths agree:
439  INTREPID2_TEST_FOR_EXCEPTION(expandedOps.size() != expandedWeights.size(), std::logic_error, "expandedWeights and expandedOps do not agree on the number of vector components");
440 
441  OperatorTensorDecomposition result(expandedOps, expandedWeights);
442  result.setRotateXYNinetyDegrees(rotateXYNinetyDegrees_);
443  return result;
444  }
445 
448  {
449  return rotateXYNinetyDegrees_;
450  }
451 
452  void setRotateXYNinetyDegrees(const bool &value)
453  {
454  rotateXYNinetyDegrees_ = value;
455  }
456 };
457 
463  template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
465  {
466  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
467  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
468 
469  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
470  using TeamMember = typename TeamPolicy::member_type;
471 
473  using RankCombinationType = typename TensorViewIteratorType::RankCombinationType;
474 
475  OutputFieldType output_; // F,P[,D…]
476  OutputFieldType input1_; // F1,P[,D…] or F1,P1[,D…]
477  OutputFieldType input2_; // F2,P[,D…] or F2,P2[,D…]
478 
479  int numFields_, numPoints_;
480  int numFields1_, numPoints1_;
481  int numFields2_, numPoints2_;
482 
483  bool tensorPoints_; // if true, input1 and input2 refer to values at decomposed points, and P = P1 * P2. If false, then the two inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
484 
485  using RankCombinationViewType = typename TensorViewIteratorType::RankCombinationViewType;
486  RankCombinationViewType rank_combinations_;// indicates the policy by which the input views will be combined in output view
487 
488  double weight_;
489 
490  public:
491 
492  TensorViewFunctor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2,
493  bool tensorPoints, double weight)
494  : output_(output), input1_(inputValues1), input2_(inputValues2), tensorPoints_(tensorPoints), weight_(weight)
495  {
496  numFields_ = output.extent_int(0);
497  numPoints_ = output.extent_int(1);
498 
499  numFields1_ = inputValues1.extent_int(0);
500  numPoints1_ = inputValues1.extent_int(1);
501 
502  numFields2_ = inputValues2.extent_int(0);
503  numPoints2_ = inputValues2.extent_int(1);
504 
505  if (!tensorPoints_)
506  {
507  // then the point counts should all match
508  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
509  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
510  }
511  else
512  {
513  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_, std::invalid_argument, "incompatible point counts");
514  }
515 
516  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_, std::invalid_argument, "incompatible field sizes");
517 
518  const ordinal_type max_rank = std::max(inputValues1.rank(),inputValues2.rank());
519  // at present, no supported case will result in an output rank greater than both input ranks
520 
521  const ordinal_type outputRank = output.rank();
522  INTREPID2_TEST_FOR_EXCEPTION(outputRank > max_rank, std::invalid_argument, "Unsupported view combination.");
523  rank_combinations_ = RankCombinationViewType("Rank_combinations_", max_rank);
524  auto rank_combinations_host = Kokkos::create_mirror_view(rank_combinations_);
525 
526  rank_combinations_host[0] = TensorViewIteratorType::TENSOR_PRODUCT; // field combination is always tensor product
527  rank_combinations_host[1] = tensorPoints ? TensorViewIteratorType::TENSOR_PRODUCT : TensorViewIteratorType::DIMENSION_MATCH; // tensorPoints controls interpretation of the point dimension
528  for (ordinal_type d=2; d<max_rank; d++)
529  {
530  // d >= 2 have the interpretation of spatial dimensions (gradients, etc.)
531  // we let the extents of the containers determine what we're doing here
532  if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == 1))
533  {
534  rank_combinations_host[d] = TensorViewIteratorType::TENSOR_CONTRACTION;
535  }
536  else if (((inputValues1.extent_int(d) == output.extent_int(d)) && (inputValues2.extent_int(d) == 1))
537  || ((inputValues2.extent_int(d) == output.extent_int(d)) && (inputValues1.extent_int(d) == 1))
538  )
539  {
540  // this looks like multiplication of a vector by a scalar, resulting in a vector
541  // this can be understood as a tensor product
542  rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
543  }
544  else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d) * inputValues2.extent_int(d)))
545  {
546  // this is actually a generalization of the above case: a tensor product, something like a vector outer product
547  rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
548  }
549  else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d)))
550  {
551  // it's a bit weird (I'm not aware of the use case, in the present context), but we can handle this case by adopting DIMENSION_MATCH here
552  // this is something like MATLAB's .* and .+ operators, which operate entry-wise
553  rank_combinations_host[d] = TensorViewIteratorType::DIMENSION_MATCH;
554  }
555  else
556  {
557  std::cout << "inputValues1.extent_int(" << d << ") = " << inputValues1.extent_int(d) << std::endl;
558  std::cout << "inputValues2.extent_int(" << d << ") = " << inputValues2.extent_int(d) << std::endl;
559  std::cout << "output.extent_int(" << d << ") = " << output.extent_int(d) << std::endl;
560  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "unable to find an interpretation for this combination of views");
561  }
562  }
563  Kokkos::deep_copy(rank_combinations_,rank_combinations_host);
564  }
565 
566  KOKKOS_INLINE_FUNCTION
567  void operator()( const TeamMember & teamMember ) const
568  {
569  auto fieldOrdinal1 = teamMember.league_rank();
570 
571  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
572  TensorViewIteratorType it(output_,input1_,input2_,rank_combinations_);
573  const int FIELD_ORDINAL_DIMENSION = 0;
574  it.setLocation({fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0});
575  int next_increment_rank = FIELD_ORDINAL_DIMENSION; // used to initialize prev_increment_rank at the start of the do/while loop. Notionally, we last incremented in the field ordinal rank to get to the {fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0} location.
576  OutputScalar accumulator = 0;
577 
578  do
579  {
580  accumulator += weight_ * it.getView1Entry() * it.getView2Entry();
581  next_increment_rank = it.nextIncrementRank();
582 
583  if ((next_increment_rank < 0) || (rank_combinations_[next_increment_rank] != TensorViewIteratorType::TENSOR_CONTRACTION))
584  {
585  // then we've finished the accumulation and should set the value
586  it.set(accumulator);
587  // reset the accumulator:
588  accumulator = 0;
589  }
590  } while (it.increment() > FIELD_ORDINAL_DIMENSION);
591  });
592  }
593  };
594 
608  template<typename BasisBaseClass = void>
609  class Basis_TensorBasis
610  :
611  public BasisBaseClass
612  {
613  public:
614  using BasisBase = BasisBaseClass;
615  using BasisPtr = Teuchos::RCP<BasisBase>;
616 
617  protected:
618  BasisPtr basis1_;
619  BasisPtr basis2_;
620 
621  std::vector<BasisPtr> tensorComponents_;
622 
623  std::string name_; // name of the basis
624 
625  int numTensorialExtrusions_; // relative to cell topo returned by getBaseCellTopology().
626  public:
627  using DeviceType = typename BasisBase::DeviceType;
628  using ExecutionSpace = typename BasisBase::ExecutionSpace;
629  using OutputValueType = typename BasisBase::OutputValueType;
630  using PointValueType = typename BasisBase::PointValueType;
631 
632  using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost;
633  using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost;
634  using OutputViewType = typename BasisBase::OutputViewType;
635  using PointViewType = typename BasisBase::PointViewType;
636  using TensorBasis = Basis_TensorBasis<BasisBaseClass>;
637  public:
644  Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace = FUNCTION_SPACE_MAX,
645  const bool useShardsCellTopologyAndTags = false)
646  :
647  basis1_(basis1),basis2_(basis2)
648  {
649  this->functionSpace_ = functionSpace;
650 
651  Basis_TensorBasis* basis1AsTensor = dynamic_cast<Basis_TensorBasis*>(basis1_.get());
652  if (basis1AsTensor)
653  {
654  auto basis1Components = basis1AsTensor->getTensorBasisComponents();
655  tensorComponents_.insert(tensorComponents_.end(), basis1Components.begin(), basis1Components.end());
656  }
657  else
658  {
659  tensorComponents_.push_back(basis1_);
660  }
661 
662  Basis_TensorBasis* basis2AsTensor = dynamic_cast<Basis_TensorBasis*>(basis2_.get());
663  if (basis2AsTensor)
664  {
665  auto basis2Components = basis2AsTensor->getTensorBasisComponents();
666  tensorComponents_.insert(tensorComponents_.end(), basis2Components.begin(), basis2Components.end());
667  }
668  else
669  {
670  tensorComponents_.push_back(basis2_);
671  }
672 
673  this->basisCardinality_ = basis1->getCardinality() * basis2->getCardinality();
674  this->basisDegree_ = std::max(basis1->getDegree(), basis2->getDegree());
675 
676  {
677  std::ostringstream basisName;
678  basisName << basis1->getName() << " x " << basis2->getName();
679  name_ = basisName.str();
680  }
681 
682  // set cell topology
683  this->basisCellTopologyKey_ = tensorComponents_[0]->getBaseCellTopology().getKey();
684  this->numTensorialExtrusions_ = tensorComponents_.size() - 1;
685 
686  this->basisType_ = basis1_->getBasisType();
687  this->basisCoordinates_ = COORDINATES_CARTESIAN;
688 
689  ordinal_type spaceDim1 = basis1_->getDomainDimension();
690  ordinal_type spaceDim2 = basis2_->getDomainDimension();
691 
692  INTREPID2_TEST_FOR_EXCEPTION(spaceDim2 != 1, std::invalid_argument, "TensorBasis only supports 1D bases in basis2_ position");
693 
694  if (this->getBasisType() == BASIS_FEM_HIERARCHICAL)
695  {
696  // fill in degree lookup:
697  int degreeSize = basis1_->getPolynomialDegreeLength() + basis2_->getPolynomialDegreeLength();
698  this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("TensorBasis - field ordinal polynomial degree", this->basisCardinality_, degreeSize);
699  this->fieldOrdinalH1PolynomialDegree_ = OrdinalTypeArray2DHost("TensorBasis - field ordinal polynomial H^1 degree", this->basisCardinality_, degreeSize);
700 
701  const ordinal_type basis1Cardinality = basis1_->getCardinality();
702  const ordinal_type basis2Cardinality = basis2_->getCardinality();
703 
704  int degreeLengthField1 = basis1_->getPolynomialDegreeLength();
705  int degreeLengthField2 = basis2_->getPolynomialDegreeLength();
706 
707  for (ordinal_type fieldOrdinal1 = 0; fieldOrdinal1 < basis1Cardinality; fieldOrdinal1++)
708  {
709  OrdinalTypeArray1DHost degreesField1 = basis1_->getPolynomialDegreeOfField(fieldOrdinal1);
710  OrdinalTypeArray1DHost h1DegreesField1 = basis1_->getH1PolynomialDegreeOfField(fieldOrdinal1);
711  for (ordinal_type fieldOrdinal2 = 0; fieldOrdinal2 < basis2Cardinality; fieldOrdinal2++)
712  {
713  OrdinalTypeArray1DHost degreesField2 = basis2_->getPolynomialDegreeOfField(fieldOrdinal2);
714  OrdinalTypeArray1DHost h1DegreesField2 = basis2_->getH1PolynomialDegreeOfField(fieldOrdinal2);
715  const ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1Cardinality + fieldOrdinal1;
716 
717  for (int d3=0; d3<degreeLengthField1; d3++)
718  {
719  this->fieldOrdinalPolynomialDegree_ (tensorFieldOrdinal,d3) = degreesField1(d3);
720  this->fieldOrdinalH1PolynomialDegree_(tensorFieldOrdinal,d3) = h1DegreesField1(d3);
721  }
722  for (int d3=0; d3<degreeLengthField2; d3++)
723  {
724  this->fieldOrdinalPolynomialDegree_ (tensorFieldOrdinal,d3+degreeLengthField1) = degreesField2(d3);
725  this->fieldOrdinalH1PolynomialDegree_(tensorFieldOrdinal,d3+degreeLengthField1) = h1DegreesField2(d3);
726  }
727  }
728  }
729  }
730 
731  if (useShardsCellTopologyAndTags)
732  {
733  setShardsTopologyAndTags();
734  }
735  else
736  {
737  // we build tags recursively, making reference to basis1_ and basis2_'s tags to produce the tensor product tags.
738  // // initialize tags
739  const auto & cardinality = this->basisCardinality_;
740 
741  // Basis-dependent initializations
742  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
743  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
744  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
745  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
746  const ordinal_type posDfCnt = 3; // position in the tag, counting from 0, of DoF count for the subcell
747 
748  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
749 
750  // we assume that basis2_ is defined on a line, and that basis1_ is defined on a domain that is once-extruded in by that line.
751  auto cellTopo = CellTopology::cellTopology(tensorComponents_[0]->getBaseCellTopology(), numTensorialExtrusions_);
752  auto basis1Topo = cellTopo->getTensorialComponent();
753 
754  const ordinal_type spaceDim = spaceDim1 + spaceDim2;
755  const ordinal_type sideDim = spaceDim - 1;
756 
757  const OrdinalTypeArray2DHost ordinalToTag1 = basis1_->getAllDofTags();
758  const OrdinalTypeArray2DHost ordinalToTag2 = basis2_->getAllDofTags();
759 
760  for (int fieldOrdinal1=0; fieldOrdinal1<basis1_->getCardinality(); fieldOrdinal1++)
761  {
762  ordinal_type subcellDim1 = ordinalToTag1(fieldOrdinal1,posScDim);
763  ordinal_type subcellOrd1 = ordinalToTag1(fieldOrdinal1,posScOrd);
764  ordinal_type subcellDfCnt1 = ordinalToTag1(fieldOrdinal1,posDfCnt);
765  for (int fieldOrdinal2=0; fieldOrdinal2<basis2_->getCardinality(); fieldOrdinal2++)
766  {
767  ordinal_type subcellDim2 = ordinalToTag2(fieldOrdinal2,posScDim);
768  ordinal_type subcellOrd2 = ordinalToTag2(fieldOrdinal2,posScOrd);
769  ordinal_type subcellDfCnt2 = ordinalToTag2(fieldOrdinal2,posDfCnt);
770 
771  ordinal_type subcellDim = subcellDim1 + subcellDim2;
772  ordinal_type subcellOrd;
773  if (subcellDim2 == 0)
774  {
775  // vertex node in extrusion; the subcell is not extruded but belongs to one of the two "copies"
776  // of the basis1 topology
777  ordinal_type sideOrdinal = cellTopo->getTensorialComponentSideOrdinal(subcellOrd2); // subcellOrd2 is a "side" of the line topology
778  subcellOrd = CellTopology::getSubcellOrdinalMap(cellTopo, sideDim, sideOrdinal,
779  subcellDim1, subcellOrd1);
780  }
781  else
782  {
783  // line subcell in time; the subcell *is* extruded in final dimension
784  subcellOrd = cellTopo->getExtrudedSubcellOrdinal(subcellDim1, subcellOrd1);
785  if (subcellOrd == -1)
786  {
787  std::cout << "ERROR: -1 subcell ordinal.\n";
788  subcellOrd = cellTopo->getExtrudedSubcellOrdinal(subcellDim1, subcellOrd1);
789  }
790  }
791  ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1_->getCardinality() + fieldOrdinal1;
792  // cout << "(" << fieldOrdinal1 << "," << fieldOrdinal2 << ") --> " << i << endl;
793  ordinal_type dofOffsetOrdinal1 = ordinalToTag1(fieldOrdinal1,posDfOrd);
794  ordinal_type dofOffsetOrdinal2 = ordinalToTag2(fieldOrdinal2,posDfOrd);
795  ordinal_type dofsForSubcell1 = ordinalToTag1(fieldOrdinal1,posDfCnt);
796  ordinal_type dofOffsetOrdinal = dofOffsetOrdinal2 * dofsForSubcell1 + dofOffsetOrdinal1;
797  tagView(tagSize*tensorFieldOrdinal + posScDim) = subcellDim; // subcellDim
798  tagView(tagSize*tensorFieldOrdinal + posScOrd) = subcellOrd; // subcell ordinal
799  tagView(tagSize*tensorFieldOrdinal + posDfOrd) = dofOffsetOrdinal; // ordinal of the specified DoF relative to the subcell
800  tagView(tagSize*tensorFieldOrdinal + posDfCnt) = subcellDfCnt1 * subcellDfCnt2; // total number of DoFs associated with the subcell
801  }
802  }
803 
804  // // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
805  // // tags are constructed on host
806  this->setOrdinalTagData(this->tagToOrdinal_,
807  this->ordinalToTag_,
808  tagView,
809  this->basisCardinality_,
810  tagSize,
811  posScDim,
812  posScOrd,
813  posDfOrd);
814  }
815  }
816 
817  void setShardsTopologyAndTags()
818  {
819  shards::CellTopology cellTopo1 = basis1_->getBaseCellTopology();
820  shards::CellTopology cellTopo2 = basis2_->getBaseCellTopology();
821 
822  auto cellKey1 = basis1_->getBaseCellTopology().getKey();
823  auto cellKey2 = basis2_->getBaseCellTopology().getKey();
824 
825  const int numTensorialExtrusions = basis1_->getNumTensorialExtrusions() + basis2_->getNumTensorialExtrusions();
826  if ((cellKey1 == shards::Line<2>::key) && (cellKey2 == shards::Line<2>::key) && (numTensorialExtrusions == 0))
827  {
828  this->basisCellTopologyKey_ = shards::Quadrilateral<4>::key;
829  }
830  else if ( ((cellKey1 == shards::Quadrilateral<4>::key) && (cellKey2 == shards::Line<2>::key))
831  || ((cellKey2 == shards::Quadrilateral<4>::key) && (cellKey1 == shards::Line<2>::key))
832  || ((cellKey1 == shards::Line<2>::key) && (cellKey2 == shards::Line<2>::key) && (numTensorialExtrusions == 1))
833  )
834  {
835  this->basisCellTopologyKey_ = shards::Hexahedron<8>::key;
836  }
837  else if ((cellKey1 == shards::Triangle<3>::key) && (cellKey2 == shards::Line<2>::key))
838  {
839  this->basisCellTopologyKey_ = shards::Wedge<6>::key;
840  }
841  else
842  {
843  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Cell topology combination not yet supported");
844  }
845 
846  // numTensorialExtrusions_ is relative to the baseCellTopology; what we've just done is found a cell topology of the same spatial dimension as the extruded topology, so now numTensorialExtrusions_ should be 0.
847  numTensorialExtrusions_ = 0;
848 
849  // initialize tags
850  {
851  const auto & cardinality = this->basisCardinality_;
852 
853  // Basis-dependent initializations
854  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
855  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
856  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
857  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
858 
859  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
860 
861  shards::CellTopology cellTopo = this->getBaseCellTopology();
862 
863  ordinal_type tensorSpaceDim = cellTopo.getDimension();
864  ordinal_type spaceDim1 = cellTopo1.getDimension();
865  ordinal_type spaceDim2 = cellTopo2.getDimension();
866 
867  TensorTopologyMap topoMap(cellTopo1, cellTopo2);
868 
869  for (ordinal_type d=0; d<=tensorSpaceDim; d++) // d: tensorial dimension
870  {
871  ordinal_type d2_max = std::min(spaceDim2, d);
872  for (ordinal_type d2=0; d2<=d2_max; d2++)
873  {
874  ordinal_type d1 = d-d2;
875  if (d1 > spaceDim1) continue;
876 
877  ordinal_type subcellCount2 = cellTopo2.getSubcellCount(d2);
878  ordinal_type subcellCount1 = cellTopo1.getSubcellCount(d1);
879  for (ordinal_type subcellOrdinal2=0; subcellOrdinal2<subcellCount2; subcellOrdinal2++)
880  {
881  ordinal_type subcellDofCount2 = basis2_->getDofCount(d2, subcellOrdinal2);
882  for (ordinal_type subcellOrdinal1=0; subcellOrdinal1<subcellCount1; subcellOrdinal1++)
883  {
884  ordinal_type subcellDofCount1 = basis1_->getDofCount(d1, subcellOrdinal1);
885  ordinal_type tensorLocalDofCount = subcellDofCount1 * subcellDofCount2;
886  for (ordinal_type localDofID2 = 0; localDofID2<subcellDofCount2; localDofID2++)
887  {
888  ordinal_type fieldOrdinal2 = basis2_->getDofOrdinal(d2, subcellOrdinal2, localDofID2);
889  OrdinalTypeArray1DHost degreesField2;
890  if (this->basisType_ == BASIS_FEM_HIERARCHICAL) degreesField2 = basis2_->getPolynomialDegreeOfField(fieldOrdinal2);
891  for (ordinal_type localDofID1 = 0; localDofID1<subcellDofCount1; localDofID1++)
892  {
893  ordinal_type fieldOrdinal1 = basis1_->getDofOrdinal(d1, subcellOrdinal1, localDofID1);
894  ordinal_type tensorLocalDofID = localDofID2 * subcellDofCount1 + localDofID1;
895  ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1_->getCardinality() + fieldOrdinal1;
896  tagView(tensorFieldOrdinal*tagSize+0) = d; // subcell dimension
897  tagView(tensorFieldOrdinal*tagSize+1) = topoMap.getCompositeSubcellOrdinal(d1, subcellOrdinal1, d2, subcellOrdinal2);
898  tagView(tensorFieldOrdinal*tagSize+2) = tensorLocalDofID;
899  tagView(tensorFieldOrdinal*tagSize+3) = tensorLocalDofCount;
900  } // localDofID1
901  } // localDofID2
902  } // subcellOrdinal1
903  } // subcellOrdinal2
904  }
905  }
906 
907  // // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
908  // // tags are constructed on host
909  this->setOrdinalTagData(this->tagToOrdinal_,
910  this->ordinalToTag_,
911  tagView,
912  this->basisCardinality_,
913  tagSize,
914  posScDim,
915  posScOrd,
916  posDfOrd);
917  }
918  }
919 
920  virtual int getNumTensorialExtrusions() const override
921  {
922  return numTensorialExtrusions_;
923  }
924 
933  ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1,
934  ordinal_type dkEnum2, ordinal_type operatorOrder2) const
935  {
936  ordinal_type spaceDim1 = basis1_->getDomainDimension();
937  ordinal_type spaceDim2 = basis2_->getDomainDimension();
938 
939  // We support total spaceDim <= 7.
940  switch (spaceDim1)
941  {
942  case 0:
943  {
944  INTREPID2_TEST_FOR_EXCEPTION(operatorOrder1 > 0, std::invalid_argument, "For spaceDim1 = 0, operatorOrder1 must be 0.");
945  return dkEnum2;
946  }
947  case 1:
948  switch (spaceDim2)
949  {
950  case 1: return getDkTensorIndex<1, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
951  case 2: return getDkTensorIndex<1, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
952  case 3: return getDkTensorIndex<1, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
953  case 4: return getDkTensorIndex<1, 4>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
954  case 5: return getDkTensorIndex<1, 5>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
955  case 6: return getDkTensorIndex<1, 6>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
956  default:
957  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
958  }
959  case 2:
960  switch (spaceDim2)
961  {
962  case 1: return getDkTensorIndex<2, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
963  case 2: return getDkTensorIndex<2, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
964  case 3: return getDkTensorIndex<2, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
965  case 4: return getDkTensorIndex<2, 4>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
966  case 5: return getDkTensorIndex<2, 5>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
967  default:
968  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
969  }
970  case 3:
971  switch (spaceDim2)
972  {
973  case 1: return getDkTensorIndex<3, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
974  case 2: return getDkTensorIndex<3, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
975  case 3: return getDkTensorIndex<3, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
976  case 4: return getDkTensorIndex<3, 4>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
977  default:
978  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
979  }
980  case 4:
981  switch (spaceDim2)
982  {
983  case 1: return getDkTensorIndex<4, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
984  case 2: return getDkTensorIndex<4, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
985  case 3: return getDkTensorIndex<4, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
986  default:
987  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
988  }
989  case 5:
990  switch (spaceDim2)
991  {
992  case 1: return getDkTensorIndex<5, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
993  case 2: return getDkTensorIndex<5, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
994  default:
995  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
996  }
997  case 6:
998  switch (spaceDim2)
999  {
1000  case 1: return getDkTensorIndex<6, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1001  default:
1002  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1003  }
1004  default:
1005  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1006  }
1007  }
1008 
1013  virtual OperatorTensorDecomposition getSimpleOperatorDecomposition(const EOperator &operatorType) const
1014  {
1015  const int spaceDim = this->getDomainDimension();
1016 
1017  const EOperator VALUE = Intrepid2::OPERATOR_VALUE;
1018 
1019  std::vector< std::vector<EOperator> > opsVALUE{{VALUE, VALUE}};
1020 
1021  std::vector< std::vector<EOperator> > ops(spaceDim);
1022 
1023  switch (operatorType)
1024  {
1025  case VALUE:
1026  ops = opsVALUE;
1027  break;
1028  case OPERATOR_DIV:
1029  case OPERATOR_CURL:
1030  // DIV and CURL are multi-family bases; subclasses are required to override
1031  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported operator type - TensorBasis subclass should override");
1032  break;
1033  case OPERATOR_GRAD:
1034  case OPERATOR_D1:
1035  case OPERATOR_D2:
1036  case OPERATOR_D3:
1037  case OPERATOR_D4:
1038  case OPERATOR_D5:
1039  case OPERATOR_D6:
1040  case OPERATOR_D7:
1041  case OPERATOR_D8:
1042  case OPERATOR_D9:
1043  case OPERATOR_D10:
1044  case OPERATOR_Dn:
1045  {
1046  auto opOrder = getOperatorOrder(operatorType); // number of derivatives that we take in total
1047  const int dkCardinality = getDkCardinality(operatorType, 2); // 2 because we have two tensor component bases, basis1_ and basis2_
1048 
1049  ops = std::vector< std::vector<EOperator> >(dkCardinality);
1050 
1051  // the Dk enumeration happens in lexicographic order (reading from left to right: x, y, z, etc.)
1052  // this governs the nesting order of the dkEnum1, dkEnum2 for loops below: dkEnum2 should increment fastest.
1053  for (int derivativeCountComp2=0; derivativeCountComp2<=opOrder; derivativeCountComp2++)
1054  {
1055  int derivativeCountComp1=opOrder-derivativeCountComp2;
1056  EOperator op1 = (derivativeCountComp1 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp1 - 1));
1057  EOperator op2 = (derivativeCountComp2 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp2 - 1));
1058 
1059  int dkCardinality1 = getDkCardinality(op1, 1); // use dim = 1 because this is a "simple" decomposition -- full decomposition will expand within the dimensions of basis1_
1060  int dkCardinality2 = getDkCardinality(op2, 1); // use dim = 1 because this is a "simple" decomposition -- full decomposition will expand within the dimensions of basis2_
1061 
1062  for (int dkEnum1=0; dkEnum1<dkCardinality1; dkEnum1++)
1063  {
1064  for (int dkEnum2=0; dkEnum2<dkCardinality2; dkEnum2++)
1065  {
1066  ordinal_type dkTensorIndex = getDkTensorIndex<1, 1>(dkEnum1, derivativeCountComp1, dkEnum2, derivativeCountComp2);
1067  ops[dkTensorIndex] = std::vector<EOperator>{op1, op2};
1068  }
1069  }
1070  }
1071  }
1072  break;
1073  }
1074 
1075  std::vector<double> weights(ops.size(), 1.0);
1076  return OperatorTensorDecomposition(ops, weights);
1077  }
1078 
1081  virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const
1082  {
1083  if (((operatorType >= OPERATOR_D1) && (operatorType <= OPERATOR_D10)) || (operatorType == OPERATOR_GRAD))
1084  {
1085  // ordering of the operators is reverse-lexicographic, reading left to right (highest-dimension is fastest-moving).
1086  // first entry will be (operatorType, VALUE, …, VALUE)
1087  // next will be (operatorType - 1, OP_D1, VALUE, …, VALUE)
1088  // then (operatorType - 1, VALUE, OP_D1, …, VALUE)
1089 
1090  ordinal_type numBasisComponents = tensorComponents_.size();
1091 
1092  auto opOrder = getOperatorOrder(operatorType); // number of derivatives that we take in total
1093  const int dkCardinality = getDkCardinality(operatorType, numBasisComponents);
1094 
1095  std::vector< std::vector<EOperator> > ops(dkCardinality);
1096 
1097  std::vector<EOperator> prevEntry(numBasisComponents, OPERATOR_VALUE);
1098  prevEntry[0] = operatorType;
1099 
1100  ops[0] = prevEntry;
1101 
1102  for (ordinal_type dkOrdinal=1; dkOrdinal<dkCardinality; dkOrdinal++)
1103  {
1104  std::vector<EOperator> entry = prevEntry;
1105 
1106  // decrement to follow reverse lexicographic ordering:
1107  /*
1108  How to tell when it is time to decrement the nth entry:
1109  1. Let a be the sum of the opOrders for entries 0 through n-1.
1110  2. Let b be the sum of the nth entry and the final entry.
1111  3. If opOrder == a + b, then the nth entry should be decremented.
1112  */
1113  ordinal_type cumulativeOpOrder = 0;
1114  ordinal_type finalOpOrder = getOperatorOrder(entry[numBasisComponents-1]);
1115  for (ordinal_type compOrdinal=0; compOrdinal<numBasisComponents; compOrdinal++)
1116  {
1117  const ordinal_type thisOpOrder = getOperatorOrder(entry[compOrdinal]);
1118  cumulativeOpOrder += thisOpOrder;
1119  if (cumulativeOpOrder + finalOpOrder == opOrder)
1120  {
1121  // decrement this
1122  EOperator decrementedOp;
1123  if (thisOpOrder == 1)
1124  {
1125  decrementedOp = OPERATOR_VALUE;
1126  }
1127  else
1128  {
1129  decrementedOp = static_cast<EOperator>(OPERATOR_D1 + ((thisOpOrder - 1) - 1));
1130  }
1131  entry[compOrdinal] = decrementedOp;
1132  const ordinal_type remainingOpOrder = opOrder - cumulativeOpOrder + 1;
1133  entry[compOrdinal+1] = static_cast<EOperator>(OPERATOR_D1 + (remainingOpOrder - 1));
1134  for (ordinal_type i=compOrdinal+2; i<numBasisComponents; i++)
1135  {
1136  entry[i] = OPERATOR_VALUE;
1137  }
1138  break;
1139  }
1140  }
1141  ops[dkOrdinal] = entry;
1142  prevEntry = entry;
1143  }
1144  std::vector<double> weights(dkCardinality, 1.0);
1145 
1146  return OperatorTensorDecomposition(ops, weights);
1147  }
1148  else
1149  {
1150  OperatorTensorDecomposition opSimpleDecomposition = this->getSimpleOperatorDecomposition(operatorType);
1151  std::vector<BasisPtr> componentBases {basis1_, basis2_};
1152  return opSimpleDecomposition.expandedDecomposition(componentBases);
1153  }
1154  }
1155 
1160  virtual BasisValues<OutputValueType,DeviceType> allocateBasisValues( TensorPoints<PointValueType,DeviceType> points, const EOperator operatorType = OPERATOR_VALUE) const override
1161  {
1162  const bool operatorIsDk = (operatorType >= OPERATOR_D1) && (operatorType <= OPERATOR_D10);
1163  const bool operatorSupported = (operatorType == OPERATOR_VALUE) || (operatorType == OPERATOR_GRAD) || (operatorType == OPERATOR_CURL) || (operatorType == OPERATOR_DIV) || operatorIsDk;
1164  INTREPID2_TEST_FOR_EXCEPTION(!operatorSupported, std::invalid_argument, "operator is not supported by allocateBasisValues");
1165 
1166  // check that points's spatial dimension matches the basis
1167  const int spaceDim = this->getDomainDimension();
1168  INTREPID2_TEST_FOR_EXCEPTION(spaceDim != points.extent_int(1), std::invalid_argument, "points must be shape (P,D), with D equal to the dimension of the basis domain");
1169 
1170  // check that points has enough tensor components
1171  ordinal_type numBasisComponents = tensorComponents_.size();
1172  if (numBasisComponents > points.numTensorComponents())
1173  {
1174  // Then we require points to have a trivial tensor structure. (Subclasses could be more sophisticated.)
1175  // (More sophisticated approaches are possible here, too, but likely the most common use case in which there is not a one-to-one correspondence
1176  // between basis components and point components will involve trivial tensor structure in the points...)
1177  INTREPID2_TEST_FOR_EXCEPTION(points.numTensorComponents() != 1, std::invalid_argument, "If points does not have the same number of tensor components as the basis, then it should have trivial tensor structure.");
1178  const ordinal_type numPoints = points.extent_int(0);
1179  auto outputView = this->allocateOutputView(numPoints, operatorType);
1180 
1181  Data<OutputValueType,DeviceType> outputData(outputView);
1182  TensorData<OutputValueType,DeviceType> outputTensorData(outputData);
1183 
1184  return BasisValues<OutputValueType,DeviceType>(outputTensorData);
1185  }
1186  INTREPID2_TEST_FOR_EXCEPTION(numBasisComponents > points.numTensorComponents(), std::invalid_argument, "points must have at least as many tensorial components as basis.");
1187 
1188  OperatorTensorDecomposition opDecomposition = getOperatorDecomposition(operatorType);
1189 
1190  ordinal_type numVectorComponents = opDecomposition.numVectorComponents();
1191  const bool useVectorData = numVectorComponents > 1;
1192 
1193  std::vector<ordinal_type> componentPointCounts(numBasisComponents);
1194  ordinal_type pointComponentNumber = 0;
1195  for (ordinal_type r=0; r<numBasisComponents; r++)
1196  {
1197  const ordinal_type compSpaceDim = tensorComponents_[r]->getDomainDimension();
1198  ordinal_type dimsSoFar = 0;
1199  ordinal_type numPointsForBasisComponent = 1;
1200  while (dimsSoFar < compSpaceDim)
1201  {
1202  INTREPID2_TEST_FOR_EXCEPTION(pointComponentNumber >= points.numTensorComponents(), std::invalid_argument, "Error in processing points container; perhaps it is mis-sized?");
1203  const int numComponentPoints = points.componentPointCount(pointComponentNumber);
1204  const int numComponentDims = points.getTensorComponent(pointComponentNumber).extent_int(1);
1205  numPointsForBasisComponent *= numComponentPoints;
1206  dimsSoFar += numComponentDims;
1207  INTREPID2_TEST_FOR_EXCEPTION(dimsSoFar > points.numTensorComponents(), std::invalid_argument, "Error in processing points container; perhaps it is mis-sized?");
1208  pointComponentNumber++;
1209  }
1210  componentPointCounts[r] = numPointsForBasisComponent;
1211  }
1212 
1213  if (useVectorData)
1214  {
1215  const int numFamilies = 1;
1216  std::vector< std::vector<TensorData<OutputValueType,DeviceType> > > vectorComponents(numFamilies, std::vector<TensorData<OutputValueType,DeviceType> >(numVectorComponents));
1217 
1218  const int familyOrdinal = 0;
1219  for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
1220  {
1221  if (!opDecomposition.identicallyZeroComponent(vectorComponentOrdinal))
1222  {
1223  std::vector< Data<OutputValueType,DeviceType> > componentData;
1224  for (ordinal_type r=0; r<numBasisComponents; r++)
1225  {
1226  const int numComponentPoints = componentPointCounts[r];
1227  const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
1228  auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
1229  componentData.push_back(Data<OutputValueType,DeviceType>(componentView));
1230  }
1231  vectorComponents[familyOrdinal][vectorComponentOrdinal] = TensorData<OutputValueType,DeviceType>(componentData);
1232  }
1233  }
1234  VectorData<OutputValueType,DeviceType> vectorData(vectorComponents);
1235  return BasisValues<OutputValueType,DeviceType>(vectorData);
1236  }
1237  else
1238  {
1239  // TensorData: single tensor product
1240  std::vector< Data<OutputValueType,DeviceType> > componentData;
1241 
1242  const ordinal_type vectorComponentOrdinal = 0;
1243  for (ordinal_type r=0; r<numBasisComponents; r++)
1244  {
1245  const int numComponentPoints = componentPointCounts[r];
1246  const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
1247  auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
1248 
1249  const int rank = 2; // (F,P) -- TensorData-only BasisValues are always scalar-valued. Use VectorData for vector-valued.
1250  // (we need to be explicit about the rank argument because GRAD, even in 1D, elevates to rank 3), so e.g. DIV of HDIV uses a componentView that is rank 3;
1251  // we want Data to insulate us from that fact)
1252  const Kokkos::Array<int,7> extents {componentView.extent_int(0), componentView.extent_int(1), 1,1,1,1,1};
1253  Kokkos::Array<DataVariationType,7> variationType {GENERAL, GENERAL, CONSTANT, CONSTANT, CONSTANT, CONSTANT, CONSTANT };
1254  componentData.push_back(Data<OutputValueType,DeviceType>(componentView, rank, extents, variationType));
1255  }
1256 
1257  TensorData<OutputValueType,DeviceType> tensorData(componentData);
1258 
1259  std::vector< TensorData<OutputValueType,DeviceType> > tensorDataEntries {tensorData};
1260  return BasisValues<OutputValueType,DeviceType>(tensorDataEntries);
1261  }
1262  }
1263 
1264  // since the getValues() below only overrides the FEM variant, we specify that
1265  // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
1266  // (It's an error to use the FVD variant on this basis.)
1267  using BasisBase::getValues;
1268 
1280  void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition,
1281  PointViewType & inputPoints1, PointViewType & inputPoints2, bool &tensorDecompositionSucceeded) const
1282  {
1283  INTREPID2_TEST_FOR_EXCEPTION(attemptTensorDecomposition, std::invalid_argument, "tensor decomposition not yet supported");
1284 
1285  // for inputPoints that are actually tensor-product of component quadrature points (say),
1286  // having just the one input (which will have a lot of redundant point data) is suboptimal
1287  // The general case can have unique x/y/z coordinates at every point, though, so we have to support that
1288  // when this interface is used. But we may try detecting that the data is tensor-product and compressing
1289  // from there... Ultimately, we should also add a getValues() variant that takes multiple input point containers,
1290  // one for each tensorial dimension.
1291 
1292  // this initial implementation is intended to simplify development of 2D and 3D bases, while also opening
1293  // the possibility of higher-dimensional bases. It is not necessarily optimized for speed/memory. There
1294  // are things we can do in this regard, which may become important for matrix-free computations wherein
1295  // basis values don't get stored but are computed dynamically.
1296 
1297  int spaceDim1 = basis1_->getDomainDimension();
1298  int spaceDim2 = basis2_->getDomainDimension();
1299 
1300  int totalSpaceDim = inputPoints.extent_int(1);
1301 
1302  TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim);
1303 
1304  // first pass: just take subviews to get input points -- this will result in redundant computations when points are themselves tensor product (i.e., inputPoints itself contains redundant data)
1305 
1306  inputPoints1 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(0,spaceDim1));
1307  inputPoints2 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim));
1308 
1309  // std::cout << "inputPoints : " << inputPoints.extent(0) << " x " << inputPoints.extent(1) << std::endl;
1310  // std::cout << "inputPoints1 : " << inputPoints1.extent(0) << " x " << inputPoints1.extent(1) << std::endl;
1311  // std::cout << "inputPoints2 : " << inputPoints2.extent(0) << " x " << inputPoints2.extent(1) << std::endl;
1312 
1313  tensorDecompositionSucceeded = false;
1314  }
1315 
1324  virtual void getDofCoords( typename BasisBase::ScalarViewType dofCoords ) const override
1325  {
1326  int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
1327  int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
1328 
1329  using ValueType = typename BasisBase::ScalarViewType::value_type;
1330  using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
1331  using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
1332 
1333  const ordinal_type basisCardinality1 = basis1_->getCardinality();
1334  const ordinal_type basisCardinality2 = basis2_->getCardinality();
1335 
1336  ViewType dofCoords1("dofCoords1",basisCardinality1,spaceDim1);
1337  ViewType dofCoords2("dofCoords2",basisCardinality2,spaceDim2);
1338 
1339  basis1_->getDofCoords(dofCoords1);
1340  basis2_->getDofCoords(dofCoords2);
1341 
1342  Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
1343  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal2)
1344  {
1345  for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
1346  {
1347  const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
1348  for (int d1=0; d1<spaceDim1; d1++)
1349  {
1350  dofCoords(fieldOrdinal,d1) = dofCoords1(fieldOrdinal1,d1);
1351  }
1352  for (int d2=0; d2<spaceDim2; d2++)
1353  {
1354  dofCoords(fieldOrdinal,spaceDim1+d2) = dofCoords2(fieldOrdinal2,d2);
1355  }
1356  }
1357  });
1358  }
1359 
1360 
1370  virtual void getDofCoeffs( typename BasisBase::ScalarViewType dofCoeffs ) const override
1371  {
1372  using ValueType = typename BasisBase::ScalarViewType::value_type;
1373  using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
1374  using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
1375 
1376  const ordinal_type basisCardinality1 = basis1_->getCardinality();
1377  const ordinal_type basisCardinality2 = basis2_->getCardinality();
1378 
1379  bool isVectorBasis1 = getFieldRank(basis1_->getFunctionSpace()) == 1;
1380  bool isVectorBasis2 = getFieldRank(basis2_->getFunctionSpace()) == 1;
1381 
1382  INTREPID2_TEST_FOR_EXCEPTION(isVectorBasis1 && isVectorBasis2, std::invalid_argument, "the case in which basis1 and basis2 are vector bases is not supported");
1383 
1384  int basisDim1 = isVectorBasis1 ? basis1_->getBaseCellTopology().getDimension() : 1;
1385  int basisDim2 = isVectorBasis2 ? basis2_->getBaseCellTopology().getDimension() : 1;
1386 
1387  auto dofCoeffs1 = isVectorBasis1 ? ViewType("dofCoeffs1",basis1_->getCardinality(), basisDim1) : ViewType("dofCoeffs1",basis1_->getCardinality());
1388  auto dofCoeffs2 = isVectorBasis2 ? ViewType("dofCoeffs2",basis2_->getCardinality(), basisDim2) : ViewType("dofCoeffs2",basis2_->getCardinality());
1389 
1390  basis1_->getDofCoeffs(dofCoeffs1);
1391  basis2_->getDofCoeffs(dofCoeffs2);
1392 
1393  Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
1394  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal2)
1395  {
1396  for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
1397  {
1398  const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
1399  for (int d1 = 0; d1 <basisDim1; d1++) {
1400  for (int d2 = 0; d2 <basisDim2; d2++) {
1401  dofCoeffs.access(fieldOrdinal,d1+d2) = dofCoeffs1.access(fieldOrdinal1,d1);
1402  dofCoeffs.access(fieldOrdinal,d1+d2) *= dofCoeffs2.access(fieldOrdinal2,d2);
1403  }
1404  }
1405  }
1406  });
1407  }
1408 
1413  virtual
1414  const char*
1415  getName() const override {
1416  return name_.c_str();
1417  }
1418 
1419  std::vector<BasisPtr> getTensorBasisComponents() const
1420  {
1421  return tensorComponents_;
1422  }
1423 
1435  virtual
1436  void
1438  const TensorPoints<PointValueType,DeviceType> inputPoints,
1439  const EOperator operatorType = OPERATOR_VALUE ) const override
1440  {
1441  const ordinal_type numTensorComponents = tensorComponents_.size();
1442  if (inputPoints.numTensorComponents() < numTensorComponents)
1443  {
1444  // then we require that both inputPoints and outputValues trivial tensor structure
1445  INTREPID2_TEST_FOR_EXCEPTION( inputPoints.numTensorComponents() != 1, std::invalid_argument, "If inputPoints differs from the tensor basis in component count, then inputPoints must have trivial tensor product structure" );
1446  INTREPID2_TEST_FOR_EXCEPTION( outputValues.numFamilies() != 1, std::invalid_argument, "If inputPoints differs from the tensor basis in component count, outputValues must have a single family with trivial tensor product structure" );
1447  INTREPID2_TEST_FOR_EXCEPTION( outputValues.tensorData().numTensorComponents() != 1, std::invalid_argument, "If inputPoints differs from the tensor basis in component count, outputValues must have a single family with trivial tensor product structure" );
1448 
1449  OutputViewType outputView = outputValues.tensorData().getTensorComponent(0).getUnderlyingView();
1450  PointViewType pointView = inputPoints.getTensorComponent(0);
1451  this->getValues(outputView, pointView, operatorType);
1452  return;
1453  }
1454 
1455  OperatorTensorDecomposition operatorDecomposition = getOperatorDecomposition(operatorType);
1456 
1457  const ordinal_type numVectorComponents = operatorDecomposition.numVectorComponents();
1458  const bool useVectorData = numVectorComponents > 1;
1459  const ordinal_type numBasisComponents = operatorDecomposition.numBasisComponents();
1460 
1461  for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
1462  {
1463  const double weight = operatorDecomposition.weight(vectorComponentOrdinal);
1464  ordinal_type pointComponentOrdinal = 0;
1465  for (ordinal_type basisOrdinal=0; basisOrdinal<numBasisComponents; basisOrdinal++, pointComponentOrdinal++)
1466  {
1467  const EOperator op = operatorDecomposition.op(vectorComponentOrdinal, basisOrdinal);
1468  // by convention, op == OPERATOR_MAX signals a zero component; skip
1469  if (op != OPERATOR_MAX)
1470  {
1471  const int vectorFamily = 0; // TensorBasis always has just a single family; multiple families arise in DirectSumBasis
1472  auto tensorData = useVectorData ? outputValues.vectorData().getComponent(vectorFamily,vectorComponentOrdinal) : outputValues.tensorData();
1473  INTREPID2_TEST_FOR_EXCEPTION( ! tensorData.getTensorComponent(basisOrdinal).isValid(), std::invalid_argument, "Invalid output component encountered");
1474 
1475  const Data<OutputValueType,DeviceType> & outputData = tensorData.getTensorComponent(basisOrdinal);
1476 
1477  auto basisValueView = outputData.getUnderlyingView();
1478  PointViewType pointView = inputPoints.getTensorComponent(pointComponentOrdinal);
1479  const ordinal_type basisDomainDimension = tensorComponents_[basisOrdinal]->getDomainDimension();
1480  if (pointView.extent_int(1) == basisDomainDimension)
1481  {
1482  tensorComponents_[basisOrdinal]->getValues(basisValueView, pointView, op);
1483  }
1484  else
1485  {
1486  // we need to wrap the basisValueView in a BasisValues container, and to wrap the point components in a TensorPoints container.
1487 
1488  // combine point components to build up to basisDomainDimension
1489  ordinal_type dimsSoFar = 0;
1490  std::vector< ScalarView<PointValueType,DeviceType> > basisPointComponents;
1491  while (dimsSoFar < basisDomainDimension)
1492  {
1493  INTREPID2_TEST_FOR_EXCEPTION(pointComponentOrdinal >= inputPoints.numTensorComponents(), std::invalid_argument, "Error in processing points container; perhaps it is mis-sized?");
1494  const auto & pointComponent = inputPoints.getTensorComponent(pointComponentOrdinal);
1495  const ordinal_type numComponentDims = pointComponent.extent_int(1);
1496  dimsSoFar += numComponentDims;
1497  INTREPID2_TEST_FOR_EXCEPTION(dimsSoFar > inputPoints.numTensorComponents(), std::invalid_argument, "Error in processing points container; perhaps it is mis-sized?");
1498  basisPointComponents.push_back(pointComponent);
1499  if (dimsSoFar < basisDomainDimension)
1500  {
1501  // we will pass through this loop again, so we should increment the point component ordinal
1502  pointComponentOrdinal++;
1503  }
1504  }
1505 
1506  TensorPoints<PointValueType, DeviceType> basisPoints(basisPointComponents);
1507 
1508  bool useVectorData2 = (basisValueView.rank() == 3);
1509 
1511  if (useVectorData2)
1512  {
1513  VectorData<OutputValueType,DeviceType> vectorData(outputData);
1514  basisValues = BasisValues<OutputValueType,DeviceType>(vectorData);
1515  }
1516  else
1517  {
1518  TensorData<OutputValueType,DeviceType> tensorData2(outputData);
1519  basisValues = BasisValues<OutputValueType,DeviceType>(tensorData2);
1520  }
1521 
1522  tensorComponents_[basisOrdinal]->getValues(basisValues, basisPoints, op);
1523  }
1524 
1525  // op.rotateXYNinetyDegrees() is set to true for one of the H(curl) wedge families
1526  // (due to the fact that Intrepid2::EOperator does not allow us to extract individual vector components
1527  // via, e.g., OPERATOR_X, OPERATOR_Y, etc., we don't have a way of expressing the decomposition
1528  // just in terms of EOperator and component-wise scalar weights; we could also do this via component-wise
1529  // matrix weights, but this would involve a more intrusive change to the implementation).
1530  const bool spansXY = (vectorComponentOrdinal == 0) && (basisValueView.extent_int(2) == 2);
1531  if (spansXY && operatorDecomposition.rotateXYNinetyDegrees())
1532  {
1533  // map from (f_x,f_y) --> (-f_y,f_x)
1534  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1)});
1535  Kokkos::parallel_for("rotateXYNinetyDegrees", policy,
1536  KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal) {
1537  const auto f_x = basisValueView(fieldOrdinal,pointOrdinal,0); // copy
1538  const auto &f_y = basisValueView(fieldOrdinal,pointOrdinal,1); // reference
1539  basisValueView(fieldOrdinal,pointOrdinal,0) = -f_y;
1540  basisValueView(fieldOrdinal,pointOrdinal,1) = f_x;
1541  });
1542  }
1543 
1544  // if weight is non-trivial (not 1.0), then we need to multiply one of the component views by weight.
1545  // we do that for the first basisOrdinal's values
1546  if ((weight != 1.0) && (basisOrdinal == 0))
1547  {
1548  if (basisValueView.rank() == 2)
1549  {
1550  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1)});
1551  Kokkos::parallel_for("multiply basisValueView by weight", policy,
1552  KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal) {
1553  basisValueView(fieldOrdinal,pointOrdinal) *= weight;
1554  });
1555  }
1556  else if (basisValueView.rank() == 3)
1557  {
1558  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1),basisValueView.extent_int(2)});
1559  Kokkos::parallel_for("multiply basisValueView by weight", policy,
1560  KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal, const int &d) {
1561  basisValueView(fieldOrdinal,pointOrdinal,d) *= weight;
1562  });
1563  }
1564  else
1565  {
1566  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported rank for basisValueView");
1567  }
1568  }
1569  }
1570  }
1571  }
1572  }
1573 
1592  void getValues( OutputViewType outputValues, const PointViewType inputPoints,
1593  const EOperator operatorType = OPERATOR_VALUE ) const override
1594  {
1595  bool tensorPoints; // true would mean that we take the tensor product of inputPoints1 and inputPoints2 (and that this would be equivalent to inputPoints as given -- i.e., inputPoints1 and inputPoints2 would be a tensor decomposition of inputPoints)
1596  bool attemptTensorDecomposition = false; // support for this not yet implemented
1597  PointViewType inputPoints1, inputPoints2;
1598  getComponentPoints(inputPoints, attemptTensorDecomposition, inputPoints1, inputPoints2, tensorPoints);
1599 
1600  const auto functionSpace = this->getFunctionSpace();
1601 
1602  if ((functionSpace == FUNCTION_SPACE_HVOL) || (functionSpace == FUNCTION_SPACE_HGRAD))
1603  {
1604  // then we can handle VALUE, GRAD, and Op_Dn without reference to subclass
1605  switch (operatorType)
1606  {
1607  case OPERATOR_VALUE:
1608  case OPERATOR_GRAD:
1609  case OPERATOR_D1:
1610  case OPERATOR_D2:
1611  case OPERATOR_D3:
1612  case OPERATOR_D4:
1613  case OPERATOR_D5:
1614  case OPERATOR_D6:
1615  case OPERATOR_D7:
1616  case OPERATOR_D8:
1617  case OPERATOR_D9:
1618  case OPERATOR_D10:
1619  {
1620  auto opOrder = getOperatorOrder(operatorType); // number of derivatives that we take in total
1621  // the Dk enumeration happens in lexicographic order (reading from left to right: x, y, z, etc.)
1622  // this governs the nesting order of the dkEnum1, dkEnum2 for loops below: dkEnum2 should increment fastest.
1623  for (int derivativeCountComp2=0; derivativeCountComp2<=opOrder; derivativeCountComp2++)
1624  {
1625  int derivativeCountComp1=opOrder-derivativeCountComp2;
1626  EOperator op1 = (derivativeCountComp1 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp1 - 1));
1627  EOperator op2 = (derivativeCountComp2 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp2 - 1));
1628 
1629  int spaceDim1 = inputPoints1.extent_int(1);
1630  int spaceDim2 = inputPoints2.extent_int(1);
1631 
1632  int dkCardinality1 = (op1 != OPERATOR_VALUE) ? getDkCardinality(op1, spaceDim1) : 1;
1633  int dkCardinality2 = (op2 != OPERATOR_VALUE) ? getDkCardinality(op2, spaceDim2) : 1;
1634 
1635  int basisCardinality1 = basis1_->getCardinality();
1636  int basisCardinality2 = basis2_->getCardinality();
1637 
1638  int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1639 
1640  int pointCount1, pointCount2;
1641  if (tensorPoints)
1642  {
1643  pointCount1 = inputPoints1.extent_int(0);
1644  pointCount2 = inputPoints2.extent_int(0);
1645  }
1646  else
1647  {
1648  pointCount1 = totalPointCount;
1649  pointCount2 = totalPointCount;
1650  }
1651 
1652  OutputViewType outputValues1, outputValues2;
1653  if (op1 == OPERATOR_VALUE)
1654  outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1);
1655  else
1656  outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1,dkCardinality1);
1657 
1658  if (op2 == OPERATOR_VALUE)
1659  outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2);
1660  else
1661  outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2,dkCardinality2);
1662 
1663  basis1_->getValues(outputValues1,inputPoints1,op1);
1664  basis2_->getValues(outputValues2,inputPoints2,op2);
1665 
1666  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1667  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1668  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1669 
1670  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1671 
1672  double weight = 1.0;
1674 
1675  for (int dkEnum1=0; dkEnum1<dkCardinality1; dkEnum1++)
1676  {
1677  auto outputValues1_dkEnum1 = (op1 != OPERATOR_VALUE) ? Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL(),dkEnum1)
1678  : Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL());
1679  for (int dkEnum2=0; dkEnum2<dkCardinality2; dkEnum2++)
1680  {
1681  auto outputValues2_dkEnum2 = (op2 != OPERATOR_VALUE) ? Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL(),dkEnum2)
1682  : Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL());
1683 
1684  ordinal_type dkTensorIndex = getTensorDkEnumeration(dkEnum1, derivativeCountComp1, dkEnum2, derivativeCountComp2);
1685  auto outputValues_dkTensor = Kokkos::subview(outputValues,Kokkos::ALL(),Kokkos::ALL(),dkTensorIndex);
1686  // Note that there may be performance optimizations available here:
1687  // - could eliminate interior for loop in favor of having a vector-valued outputValues1_dk
1688  // - could add support to TensorViewFunctor (and probably TensorViewIterator) for this kind of tensor Dk type of traversal
1689  // (this would allow us to eliminate both for loops here)
1690  // At the moment, we defer such optimizations on the idea that this may not ever become a performance bottleneck.
1691  FunctorType functor(outputValues_dkTensor, outputValues1_dkEnum1, outputValues2_dkEnum2, tensorPoints, weight);
1692  Kokkos::parallel_for("TensorViewFunctor", policy , functor);
1693  }
1694  }
1695  }
1696  }
1697  break;
1698  default: // non-OPERATOR_Dn case must be handled by subclass.
1699  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, tensorPoints);
1700  }
1701  }
1702  else
1703  {
1704  // not HVOL or HGRAD; subclass must handle
1705  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, tensorPoints);
1706  }
1707  }
1708 
1734  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1735  const PointViewType inputPoints1, const PointViewType inputPoints2,
1736  bool tensorPoints) const
1737  {
1738  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "one-operator, two-inputPoints getValues should be overridden by TensorBasis subclasses");
1739  }
1740 
1764  void getValues( OutputViewType outputValues,
1765  const PointViewType inputPoints1, const EOperator operatorType1,
1766  const PointViewType inputPoints2, const EOperator operatorType2,
1767  bool tensorPoints, double weight=1.0) const
1768  {
1769  int basisCardinality1 = basis1_->getCardinality();
1770  int basisCardinality2 = basis2_->getCardinality();
1771 
1772  int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1773 
1774  int pointCount1, pointCount2;
1775  if (tensorPoints)
1776  {
1777  pointCount1 = inputPoints1.extent_int(0);
1778  pointCount2 = inputPoints2.extent_int(0);
1779  }
1780  else
1781  {
1782  pointCount1 = totalPointCount;
1783  pointCount2 = totalPointCount;
1784  }
1785 
1786  const ordinal_type spaceDim1 = inputPoints1.extent_int(1);
1787  const ordinal_type spaceDim2 = inputPoints2.extent_int(1);
1788 
1789  INTREPID2_TEST_FOR_EXCEPTION(!tensorPoints && (totalPointCount != inputPoints2.extent_int(0)),
1790  std::invalid_argument, "If tensorPoints is false, the point counts must match!");
1791 
1792  const ordinal_type opRank1 = getOperatorRank(basis1_->getFunctionSpace(), operatorType1, spaceDim1);
1793  const ordinal_type opRank2 = getOperatorRank(basis2_->getFunctionSpace(), operatorType2, spaceDim2);
1794 
1795  const ordinal_type outputRank1 = opRank1 + getFieldRank(basis1_->getFunctionSpace());
1796  const ordinal_type outputRank2 = opRank2 + getFieldRank(basis2_->getFunctionSpace());
1797 
1798  OutputViewType outputValues1, outputValues2;
1799  if (outputRank1 == 0)
1800  {
1801  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
1802  }
1803  else if (outputRank1 == 1)
1804  {
1805  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
1806  }
1807  else
1808  {
1809  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank1");
1810  }
1811 
1812  if (outputRank2 == 0)
1813  {
1814  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
1815  }
1816  else if (outputRank2 == 1)
1817  {
1818  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
1819  }
1820  else
1821  {
1822  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank2");
1823  }
1824 
1825  basis1_->getValues(outputValues1,inputPoints1,operatorType1);
1826  basis2_->getValues(outputValues2,inputPoints2,operatorType2);
1827 
1828  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1829  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1830  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1831 
1832  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1833 
1835 
1836  FunctorType functor(outputValues, outputValues1, outputValues2, tensorPoints, weight);
1837  Kokkos::parallel_for("TensorViewFunctor", policy , functor);
1838  }
1839 
1844  virtual HostBasisPtr<OutputValueType, PointValueType>
1845  getHostBasis() const override {
1846  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "TensorBasis subclasses must override getHostBasis");
1847  }
1848  }; // Basis_TensorBasis
1849 
1857  template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
1859  {
1860  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
1861  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1862 
1863  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
1864  using TeamMember = typename TeamPolicy::member_type;
1865 
1866  OutputFieldType output_; // F,P
1867  OutputFieldType input1_; // F1,P[,D] or F1,P1[,D]
1868  OutputFieldType input2_; // F2,P[,D] or F2,P2[,D]
1869  OutputFieldType input3_; // F2,P[,D] or F2,P2[,D]
1870 
1871  int numFields_, numPoints_;
1872  int numFields1_, numPoints1_;
1873  int numFields2_, numPoints2_;
1874  int numFields3_, numPoints3_;
1875 
1876  bool tensorPoints_; // if true, input1, input2, input3 refer to values at decomposed points, and P = P1 * P2 * P3. If false, then the three inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
1877 
1878  double weight_;
1879 
1880  TensorBasis3_Functor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2, OutputFieldType inputValues3,
1881  bool tensorPoints, double weight)
1882  : output_(output), input1_(inputValues1), input2_(inputValues2), input3_(inputValues3), tensorPoints_(tensorPoints), weight_(weight)
1883  {
1884  numFields_ = output.extent_int(0);
1885  numPoints_ = output.extent_int(1);
1886 
1887  numFields1_ = inputValues1.extent_int(0);
1888  numPoints1_ = inputValues1.extent_int(1);
1889 
1890  numFields2_ = inputValues2.extent_int(0);
1891  numPoints2_ = inputValues2.extent_int(1);
1892 
1893  numFields3_ = inputValues3.extent_int(0);
1894  numPoints3_ = inputValues3.extent_int(1);
1895  /*
1896  We don't yet support tensor-valued bases here (only vector and scalar). The main design question is how the layouts
1897  of the input containers relates to the layout of the output container. The work we've done in TensorViewIterator basically
1898  shows the choices that can be made. It does appear that in most cases (at least (most of?) those supported by TensorViewIterator),
1899  we can infer from the dimensions of input/output containers what choice should be made in each dimension.
1900  */
1901  INTREPID2_TEST_FOR_EXCEPTION(inputValues1.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1902  INTREPID2_TEST_FOR_EXCEPTION(inputValues2.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1903  INTREPID2_TEST_FOR_EXCEPTION(inputValues3.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1904  INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues2.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1905  INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1906  INTREPID2_TEST_FOR_EXCEPTION((inputValues2.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1907 
1908  if (!tensorPoints_)
1909  {
1910  // then the point counts should all match
1911  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
1912  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
1913  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints3_, std::invalid_argument, "incompatible point counts");
1914  }
1915  else
1916  {
1917  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_ * numPoints3_, std::invalid_argument, "incompatible point counts");
1918  }
1919 
1920  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_ * numFields3_, std::invalid_argument, "incompatible field sizes");
1921  }
1922 
1923  KOKKOS_INLINE_FUNCTION
1924  void operator()( const TeamMember & teamMember ) const
1925  {
1926  auto fieldOrdinal1 = teamMember.league_rank();
1927 
1928  if (!tensorPoints_)
1929  {
1930  if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2))
1931  {
1932  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1933  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1934  {
1935  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1936  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1937  {
1938  output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1939  }
1940  }
1941  });
1942  }
1943  else if (input1_.rank() == 3)
1944  {
1945  int spaceDim = input1_.extent_int(2);
1946  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1947  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1948  {
1949  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1950  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1951  {
1952  for (int d=0; d<spaceDim; d++)
1953  {
1954  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal,d) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1955  }
1956  }
1957  }
1958  });
1959  }
1960  else if (input2_.rank() == 3)
1961  {
1962  int spaceDim = input2_.extent_int(2);
1963  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1964  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1965  {
1966  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1967  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1968  {
1969  for (int d=0; d<spaceDim; d++)
1970  {
1971  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal,d) * input3_(fieldOrdinal3,pointOrdinal);
1972  }
1973  }
1974  }
1975  });
1976  }
1977  else if (input3_.rank() == 3)
1978  {
1979  int spaceDim = input3_.extent_int(2);
1980  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1981  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1982  {
1983  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1984  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1985  {
1986  for (int d=0; d<spaceDim; d++)
1987  {
1988  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal,d);
1989  }
1990  }
1991  }
1992  });
1993  }
1994  else
1995  {
1996  // unsupported rank combination -- enforced in constructor
1997  }
1998  }
1999  else
2000  {
2001  if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2) )
2002  {
2003  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
2004  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
2005  {
2006  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2007  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
2008  {
2009  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
2010  {
2011  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
2012  {
2013  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
2014  output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
2015  }
2016  }
2017  }
2018  }
2019  });
2020  }
2021  else if (input1_.rank() == 3) // based on constructor requirements, this means the others are rank 2
2022  {
2023  int spaceDim = input1_.extent_int(2);
2024  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
2025  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
2026  {
2027  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2028  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
2029  {
2030  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
2031  {
2032  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
2033  {
2034  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
2035  for (int d=0; d<spaceDim; d++)
2036  {
2037  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1,d) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
2038  }
2039  }
2040  }
2041  }
2042  }
2043  });
2044  }
2045  else if (input2_.rank() == 3) // based on constructor requirements, this means the others are rank 2
2046  {
2047  int spaceDim = input2_.extent_int(2);
2048  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
2049  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
2050  {
2051  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2052  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
2053  {
2054  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
2055  {
2056  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
2057  {
2058  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
2059  for (int d=0; d<spaceDim; d++)
2060  {
2061  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2,d) * input3_(fieldOrdinal3,pointOrdinal3);
2062  }
2063  }
2064  }
2065  }
2066  }
2067  });
2068  }
2069  else if (input3_.rank() == 3) // based on constructor requirements, this means the others are rank 2
2070  {
2071  int spaceDim = input3_.extent_int(2);
2072  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
2073  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
2074  {
2075  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2076  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
2077  {
2078  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
2079  {
2080  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
2081  {
2082  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
2083  for (int d=0; d<spaceDim; d++)
2084  {
2085  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3,d);
2086  }
2087  }
2088  }
2089  }
2090  }
2091  });
2092  }
2093  else
2094  {
2095  // unsupported rank combination -- enforced in constructor
2096  }
2097  }
2098  }
2099  }; // TensorBasis3_Functor
2100 
2101 
2102  template<typename BasisBaseClass = void>
2104  : public Basis_TensorBasis<BasisBaseClass>
2105  {
2106  using BasisBase = BasisBaseClass;
2108  public:
2109  using typename BasisBase::OutputViewType;
2110  using typename BasisBase::PointViewType;
2111  using typename BasisBase::ScalarViewType;
2112 
2113  using typename BasisBase::OutputValueType;
2114  using typename BasisBase::PointValueType;
2115 
2116  using typename BasisBase::ExecutionSpace;
2117 
2118  using BasisPtr = Teuchos::RCP<BasisBase>;
2119  protected:
2120  BasisPtr basis1_;
2121  BasisPtr basis2_;
2122  BasisPtr basis3_;
2123  public:
2124  Basis_TensorBasis3(BasisPtr basis1, BasisPtr basis2, BasisPtr basis3, const bool useShardsCellTopologyAndTags = false)
2125  :
2126  TensorBasis(Teuchos::rcp( new TensorBasis(basis1,basis2,FUNCTION_SPACE_MAX,useShardsCellTopologyAndTags)),
2127  basis3,
2128  FUNCTION_SPACE_MAX,useShardsCellTopologyAndTags),
2129  basis1_(basis1),
2130  basis2_(basis2),
2131  basis3_(basis3)
2132  {}
2133 
2140  virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const override
2141  {
2142  OperatorTensorDecomposition opSimpleDecomposition = this->getSimpleOperatorDecomposition(operatorType);
2143  std::vector<BasisPtr> componentBases {basis1_, basis2_, basis3_};
2144  return opSimpleDecomposition.expandedDecomposition(componentBases);
2145  }
2146 
2147  using TensorBasis::getValues;
2148 
2173  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
2174  const PointViewType inputPoints12, const PointViewType inputPoints3,
2175  bool tensorPoints) const override
2176  {
2177  // TODO: rework this to use superclass's getComponentPoints.
2178 
2179  int spaceDim1 = basis1_->getDomainDimension();
2180  int spaceDim2 = basis2_->getDomainDimension();
2181 
2182  int totalSpaceDim12 = inputPoints12.extent_int(1);
2183 
2184  TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim12);
2185 
2186  if (!tensorPoints)
2187  {
2188  auto inputPoints1 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(0,spaceDim1));
2189  auto inputPoints2 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim12));
2190 
2191  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, inputPoints3, tensorPoints);
2192  }
2193  else
2194  {
2195  // superclass doesn't (yet) have a clever way to detect tensor points in a single container
2196  // we'd need something along those lines here to detect them in inputPoints12.
2197  // if we do add such a mechanism to superclass, it should be simple enough to call that from here
2198  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "This method does not yet handle tensorPoints=true");
2199  }
2200  }
2201 
2229  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
2230  const PointViewType inputPoints1, const PointViewType inputPoints2, const PointViewType inputPoints3,
2231  bool tensorPoints) const = 0;
2232 
2260  void getValues( OutputViewType outputValues,
2261  const PointViewType inputPoints1, const EOperator operatorType1,
2262  const PointViewType inputPoints2, const EOperator operatorType2,
2263  const PointViewType inputPoints3, const EOperator operatorType3,
2264  bool tensorPoints, double weight=1.0) const
2265  {
2266  int basisCardinality1 = basis1_->getCardinality();
2267  int basisCardinality2 = basis2_->getCardinality();
2268  int basisCardinality3 = basis3_->getCardinality();
2269 
2270  int spaceDim1 = inputPoints1.extent_int(1);
2271  int spaceDim2 = inputPoints2.extent_int(1);
2272  int spaceDim3 = inputPoints3.extent_int(1);
2273 
2274  int totalPointCount;
2275  int pointCount1, pointCount2, pointCount3;
2276  if (tensorPoints)
2277  {
2278  pointCount1 = inputPoints1.extent_int(0);
2279  pointCount2 = inputPoints2.extent_int(0);
2280  pointCount3 = inputPoints3.extent_int(0);
2281  totalPointCount = pointCount1 * pointCount2 * pointCount3;
2282  }
2283  else
2284  {
2285  totalPointCount = inputPoints1.extent_int(0);
2286  pointCount1 = totalPointCount;
2287  pointCount2 = totalPointCount;
2288  pointCount3 = totalPointCount;
2289 
2290  INTREPID2_TEST_FOR_EXCEPTION((totalPointCount != inputPoints2.extent_int(0)) || (totalPointCount != inputPoints3.extent_int(0)),
2291  std::invalid_argument, "If tensorPoints is false, the point counts must match!");
2292  }
2293 
2294  // structure of this implementation:
2295  /*
2296  - allocate output1, output2, output3 containers
2297  - either:
2298  1. split off the tensor functor call into its own method in TensorBasis, and
2299  - call it once with output1, output2, placing these in another newly allocated output12, then
2300  - call it again with output12, output3
2301  OR
2302  2. create a 3-argument tensor functor and call it with output1,output2,output3
2303 
2304  At the moment, the 3-argument functor seems like a better approach. It's likely more code, but somewhat
2305  more efficient and easier to understand/debug. And the code is fairly straightforward to produce.
2306  */
2307 
2308  // copied from the 2-argument TensorBasis implementation:
2309 
2310  OutputViewType outputValues1, outputValues2, outputValues3;
2311 
2312  //Note: the gradient of HGRAD basis on a line has an output vector of rank 3, the last dimension being of size 1.
2313  // in particular this holds even when computing the divergence of an HDIV basis, which is scalar and has rank 2.
2314  if ((spaceDim1 == 1) && (operatorType1 == OPERATOR_VALUE))
2315  {
2316  // use a rank 2 container for basis1
2317  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
2318  }
2319  else
2320  {
2321  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
2322  }
2323  if ((spaceDim2 == 1) && (operatorType2 == OPERATOR_VALUE))
2324  {
2325  // use a rank 2 container for basis2
2326  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
2327  }
2328  else
2329  {
2330  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
2331  }
2332  if ((spaceDim3 == 1) && (operatorType3 == OPERATOR_VALUE))
2333  {
2334  // use a rank 2 container for basis2
2335  outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3);
2336  }
2337  else
2338  {
2339  outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3,spaceDim3);
2340  }
2341 
2342  basis1_->getValues(outputValues1,inputPoints1,operatorType1);
2343  basis2_->getValues(outputValues2,inputPoints2,operatorType2);
2344  basis3_->getValues(outputValues3,inputPoints3,operatorType3);
2345 
2346  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
2347  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
2348  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
2349 
2350  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
2351 
2353  FunctorType functor(outputValues, outputValues1, outputValues2, outputValues3, tensorPoints, weight);
2354  Kokkos::parallel_for("TensorBasis3_Functor", policy , functor);
2355  }
2356 
2364  virtual void getDofCoeffs( typename BasisBase::ScalarViewType dofCoeffs ) const override
2365  {
2366  using ValueType = typename BasisBase::ScalarViewType::value_type;
2367  using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
2368  using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, typename TensorBasis::DeviceType >;
2369 
2370  const ordinal_type basisCardinality1 = basis1_->getCardinality();
2371  const ordinal_type basisCardinality2 = basis2_->getCardinality();
2372  const ordinal_type basisCardinality3 = basis3_->getCardinality();
2373 
2374  auto dofCoeffs1 = ViewType("dofCoeffs1",basisCardinality1);
2375  auto dofCoeffs2 = ViewType("dofCoeffs2",basisCardinality2);
2376  auto dofCoeffs3 = ViewType("dofCoeffs3",basisCardinality3);
2377 
2378  basis1_->getDofCoeffs(dofCoeffs1);
2379  basis2_->getDofCoeffs(dofCoeffs2);
2380  basis3_->getDofCoeffs(dofCoeffs3);
2381 
2382  Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality3);
2383  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal3)
2384  {
2385  for (int fieldOrdinal2=0; fieldOrdinal2<basisCardinality2; fieldOrdinal2++)
2386  for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
2387  {
2388  const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1 + fieldOrdinal3 * (basisCardinality1*basisCardinality2);
2389  dofCoeffs(fieldOrdinal) = dofCoeffs1(fieldOrdinal1);
2390  dofCoeffs(fieldOrdinal) *= dofCoeffs2(fieldOrdinal2) * dofCoeffs3(fieldOrdinal3);
2391  }
2392  });
2393  }
2394 
2399  virtual HostBasisPtr<OutputValueType, PointValueType>
2400  getHostBasis() const override {
2401  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "TensorBasis3 subclasses must override getHostBasis");
2402  }
2403  };
2404 } // end namespace Intrepid2
2405 
2406 #endif /* Intrepid2_TensorBasis_h */
KOKKOS_INLINE_FUNCTION ScalarType getView1Entry()
Functor for computing values for the TensorBasis class.
ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1, ordinal_type dkEnum2, ordinal_type operatorOrder2) const
Given &quot;Dk&quot; enumeration indices for the component bases, returns a Dk enumeration index for the compos...
Implements arbitrary-dimensional extrusion of a base shards::CellTopology.
View-like interface to tensor points; point components are stored separately; the appropriate coordin...
#define INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(test, x, msg)
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
void getValues(OutputViewType outputValues, const PointViewType inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell.
const VectorDataType & vectorData() const
VectorData accessor.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1...
For a multi-component tensor basis, specifies the operators to be applied to the components to produc...
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints12, const PointViewType inputPoints3, bool tensorPoints) const override
Evaluation of a tensor FEM basis on a reference cell.
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
virtual void getValues(BasisValues< OutputValueType, DeviceType > outputValues, const TensorPoints< PointValueType, DeviceType > inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell, using point and output value containers that allow pre...
void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition, PointViewType &inputPoints1, PointViewType &inputPoints2, bool &tensorDecompositionSucceeded) const
Method to extract component points from composite points.
Header function for Intrepid2::Util class and other utility functions.
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell.
Implementation of an assert that can safely be called from device code.
Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace=FUNCTION_SPACE_MAX, const bool useShardsCellTopologyAndTags=false)
Constructor.
TensorDataType & tensorData()
TensorData accessor for single-family scalar data.
The data containers in Intrepid2 that support sum factorization and other reduced-data optimizations ...
bool rotateXYNinetyDegrees() const
If true, this flag indicates that a vector component that spans the first two dimensions should be ro...
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
KOKKOS_INLINE_FUNCTION int numFamilies() const
For valid vectorData, returns the number of families in vectorData; otherwise, returns number of Tens...
KOKKOS_INLINE_FUNCTION void set(ScalarType value)
KOKKOS_INLINE_FUNCTION void setLocation(const Kokkos::Array< int, 7 > location)
KOKKOS_INLINE_FUNCTION ordinal_type numTensorComponents() const
Returns the number of tensorial components.
KOKKOS_INLINE_FUNCTION ScalarType getView2Entry()
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
virtual OperatorTensorDecomposition getSimpleOperatorDecomposition(const EOperator &operatorType) const
Returns a simple decomposition of the specified operator: what operator(s) should be applied to basis...
static CellTopoPtr cellTopology(const shards::CellTopology &shardsCellTopo, ordinal_type tensorialDegree=0)
static accessor that returns a CellTopoPtr; these are lazily constructed and cached.
virtual void getDofCoords(typename BasisBase::ScalarViewType dofCoords) const override
Fills in spatial locations (coordinates) of degrees of freedom (nodes) on the reference cell...
virtual const char * getName() const override
Returns basis name.
Implementation of support for traversing component views alongside a view that represents a combinati...
virtual BasisValues< OutputValueType, DeviceType > allocateBasisValues(TensorPoints< PointValueType, DeviceType > points, const EOperator operatorType=OPERATOR_VALUE) const override
Allocate BasisValues container suitable for passing to the getValues() variant that takes a TensorPoi...
KOKKOS_INLINE_FUNCTION int nextIncrementRank()
virtual void getDofCoeffs(typename BasisBase::ScalarViewType dofCoeffs) const override
Fills in coefficients of degrees of freedom on the reference cell.
ordinal_type componentPointCount(const ordinal_type &tensorComponentOrdinal) const
Returns the number of points in the indicated component.
virtual void getDofCoeffs(typename BasisBase::ScalarViewType dofCoeffs) const override
Fills in coefficients of degrees of freedom on the reference cell.
OperatorTensorDecomposition expandedDecomposition(std::vector< Teuchos::RCP< Basis< DeviceType, OutputValueType, PointValueType > > > &bases)
takes as argument bases that are components in this decomposition, and decomposes them further if the...
Class that defines mappings from component cell topologies to their tensor product topologies...
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
View-like interface to tensor data; tensor components are stored separately and multiplied together a...
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints1, const PointViewType inputPoints2, bool tensorPoints) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const override
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
A helper class that allows iteration over three Kokkos Views simultaneously, according to tensor comb...
Basis defined as the tensor product of two component bases.
static ordinal_type getSubcellOrdinalMap(CellTopoPtr cellTopo, ordinal_type subcdim, ordinal_type subcord, ordinal_type subsubcdim, ordinal_type subsubcord)
Maps the from a subcell within a subcell of the present CellTopology to the subcell in the present Ce...
KOKKOS_INLINE_FUNCTION ScalarView< PointScalar, DeviceType > getTensorComponent(const ordinal_type &r) const
Returns the requested tensor component.
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, const PointViewType inputPoints3, const EOperator operatorType3, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
Reference-space field values for a basis, designed to support typical vector-valued bases...
Functor for computing values for the TensorBasis3 class.
KOKKOS_INLINE_FUNCTION std::enable_if< std::is_integral< iType >::value, int >::type extent_int(const iType &r) const
Returns the logical extent in the requested dimension.
Header file for the abstract base class Intrepid2::Basis.