Intrepid2
Intrepid2_TensorBasis.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ************************************************************************
3 //
4 // Intrepid2 Package
5 // Copyright (2007) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Kyungjoo Kim (kyukim@sandia.gov),
38 // Mauro Perego (mperego@sandia.gov), or
39 // Nate Roberts (nvrober@sandia.gov)
40 //
41 // ************************************************************************
42 // @HEADER
43 
49 #ifndef Intrepid2_TensorBasis_h
50 #define Intrepid2_TensorBasis_h
51 
52 #include <Kokkos_DynRankView.hpp>
53 
54 #include <Teuchos_RCP.hpp>
55 
56 #include <Intrepid2_config.h>
57 
58 #include <map>
59 #include <set>
60 #include <vector>
61 
62 #include "Intrepid2_Basis.hpp"
66 #include "Intrepid2_Utils.hpp" // defines FAD_VECTOR_SIZE, VECTOR_SIZE
67 
69 
70 namespace Intrepid2
71 {
72  template<ordinal_type spaceDim>
73  KOKKOS_INLINE_FUNCTION
74  ordinal_type getDkEnumeration(const Kokkos::Array<int,spaceDim> &entries);
75 
76  template<ordinal_type spaceDim>
77  KOKKOS_INLINE_FUNCTION
78  void getDkEnumerationInverse(Kokkos::Array<int,spaceDim> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder);
79 
80  template<>
81  KOKKOS_INLINE_FUNCTION
82  void getDkEnumerationInverse<1>(Kokkos::Array<int,1> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
83  {
84  entries[0] = operatorOrder;
85  }
86 
87  template<>
88  KOKKOS_INLINE_FUNCTION
89  void getDkEnumerationInverse<2>(Kokkos::Array<int,2> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
90  {
91  entries[0] = operatorOrder - dkEnum;
92  entries[1] = dkEnum;
93  }
94 
95  template<>
96  KOKKOS_INLINE_FUNCTION
97  void getDkEnumerationInverse<3>(Kokkos::Array<int,3> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
98  {
99  // formula is zMult + (yMult+zMult)*(yMult+zMult+1)/2; where xMult+yMult+zMult = operatorOrder
100  // it seems complicated to work out a formula that will invert this. For the present we just take a brute force approach,
101  // using getDkEnumeration() to check each possibility
102  for (ordinal_type yMult=0; yMult<=operatorOrder; yMult++)
103  {
104  for (ordinal_type zMult=0; zMult<=operatorOrder-yMult; zMult++)
105  {
106  const ordinal_type xMult = operatorOrder-(zMult+yMult);
107  if (dkEnum == getDkEnumeration<3>(xMult,yMult,zMult))
108  {
109  entries[0] = xMult;
110  entries[1] = yMult;
111  entries[2] = zMult;
112  return;
113  }
114  }
115  }
116  }
117 
118  template<ordinal_type spaceDim>
119  KOKKOS_INLINE_FUNCTION
120  void getDkEnumerationInverse(Kokkos::Array<int,spaceDim> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
121  {
122  // for operator order k, the recursive formula defining getDkEnumeration is:
123  // getDkEnumeration(k0,k1,…,k_{n-1}) = getDkCardinality(k - k0) + getDkEnumeration(k1,…,k_{n-1})
124  // The entries are in reverse lexicographic order. We search for k0, by incrementing k0 until getDkEnumeration(k0,0,…,0) <= dkEnum
125  // Then we recursively call getDkEnumerationInverse<spaceDim-1>({k1,…,k_{n-1}}, dkEnum - getDkEnumeration(k0,0,…,0) - 1)
126 
127  for (int k0=0; k0<=operatorOrder; k0++)
128  {
129  entries[0] = k0;
130  for (int d=1; d<spaceDim-1; d++)
131  {
132  entries[d] = 0;
133  }
134  // sum of entries must be equal to operatorOrder
135  if (spaceDim > 1) entries[spaceDim-1] = operatorOrder - k0;
136  else if (k0 != operatorOrder) continue; // if spaceDim == 1, then the only way the sum of the entries is operatorOrder is if k0 == operatorOrder
137  const ordinal_type dkEnumFor_k0 = getDkEnumeration<spaceDim>(entries);
138 
139  if (dkEnumFor_k0 > dkEnum) continue; // next k0
140  else if (dkEnumFor_k0 == dkEnum) return; // entries has (k0,0,…,0), and this has dkEnum as its enumeration value
141  else
142  {
143  // (k0,0,…,0) is prior to the dkEnum entry, which means that the dkEnum entry starts with k0-1.
144  entries[0] = k0 - 1;
145 
146  // We determine the rest of the entries through a recursive call to getDkEnumerationInverse<spaceDim - 1>().
147 
148  // ensure that we don't try to allocate an empty array…
149  constexpr ordinal_type sizeForSubArray = (spaceDim > 2) ? spaceDim - 1 : 1;
150  Kokkos::Array<int,sizeForSubArray> subEntries = {};
151 
152  // the -1 in sub-entry enumeration value accounts for the fact that the entry is the one *after* (k0,0,…,0)
153  getDkEnumerationInverse<spaceDim-1>(subEntries, dkEnum - dkEnumFor_k0 - 1, operatorOrder - entries[0]);
154 
155  for (int i=1; i<spaceDim; i++)
156  {
157  entries[i] = subEntries[i-1];
158  }
159  return;
160  }
161  }
162  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(true, std::invalid_argument, "entries corresponding to dkEnum not found");
163  }
164 
165  template<>
166  KOKKOS_INLINE_FUNCTION
167  ordinal_type getDkEnumeration<1>(const Kokkos::Array<int,1> &entries)
168  {
169  return getDkEnumeration<1>(entries[0]);
170  }
171 
172  template<ordinal_type spaceDim>
173  KOKKOS_INLINE_FUNCTION
174  ordinal_type getDkEnumeration(const Kokkos::Array<int,spaceDim> &entries)
175  {
176  ordinal_type k_minus_k0 = 0; // sum of all the entries but the first
177 
178  // recursive formula in general is: getDkEnumeration(k0,k1,…,k_{n-1}) = getDkCardinality(k - k0) + getDkEnumeration(k1,…,k_{n-1})
179  // ensure that we don't try to allocate an empty array…
180  constexpr ordinal_type sizeForSubArray = (spaceDim > 2) ? spaceDim - 1 : 1;
181  Kokkos::Array<int,sizeForSubArray> remainingEntries;
182  for (int i=1; i<spaceDim; i++)
183  {
184  k_minus_k0 += entries[i];
185  remainingEntries[i-1] = entries[i];
186  }
187 
188  if (k_minus_k0 == 0)
189  {
190  return 0;
191  }
192  else
193  {
194  EOperator opFor_k_minus_k0_minus_1 = (k_minus_k0 > 1) ? EOperator(OPERATOR_D1 + k_minus_k0 - 2) : EOperator(OPERATOR_VALUE);
195  const ordinal_type dkCardinality = getDkCardinality(opFor_k_minus_k0_minus_1, spaceDim);
196  const ordinal_type dkEnum = dkCardinality + getDkEnumeration<sizeForSubArray>(remainingEntries);
197  return dkEnum;
198  }
199  }
200 
201  template<ordinal_type spaceDim1, ordinal_type spaceDim2>
202  KOKKOS_INLINE_FUNCTION
203  ordinal_type getDkTensorIndex(const ordinal_type dkEnum1, const ordinal_type operatorOrder1,
204  const ordinal_type dkEnum2, const ordinal_type operatorOrder2)
205  {
206  Kokkos::Array<int,spaceDim1> entries1 = {};
207  getDkEnumerationInverse<spaceDim1>(entries1, dkEnum1, operatorOrder1);
208 
209  Kokkos::Array<int,spaceDim2> entries2 = {};
210  getDkEnumerationInverse<spaceDim2>(entries2, dkEnum2, operatorOrder2);
211 
212  const int spaceDim = spaceDim1 + spaceDim2;
213  Kokkos::Array<int,spaceDim> entries;
214 
215  for (ordinal_type d=0; d<spaceDim1; d++)
216  {
217  entries[d] = entries1[d];
218  }
219 
220  for (ordinal_type d=0; d<spaceDim2; d++)
221  {
222  entries[d+spaceDim1] = entries2[d];
223  }
224 
225  return getDkEnumeration<spaceDim>(entries);
226  }
227 
228 template<typename BasisBase>
230 
235 {
236  // if we want to make this usable on device, we could switch to Kokkos::Array instead of std::vector. But this is not our immediate use case.
237  std::vector< std::vector<EOperator> > ops; // outer index: vector entry ordinal; inner index: basis component ordinal. (scalar-valued operators have a single entry in outer vector)
238  std::vector<double> weights; // weights for each vector entry
239  ordinal_type numBasisComponents_;
240  bool rotateXYNinetyDegrees_ = false; // if true, indicates that something that otherwise would have values (f_x, f_y, …) should be mapped to (-f_y, f_x, …). This is used for H(curl) wedges (specifically, for OPERATOR_CURL).
241 
242  OperatorTensorDecomposition(const std::vector<EOperator> &opsBasis1, const std::vector<EOperator> &opsBasis2, const std::vector<double> vectorComponentWeights)
243  :
244  weights(vectorComponentWeights),
245  numBasisComponents_(2)
246  {
247  const ordinal_type size = opsBasis1.size();
248  const ordinal_type opsBasis2Size = opsBasis2.size();
249  const ordinal_type weightsSize = weights.size();
250  INTREPID2_TEST_FOR_EXCEPTION(size != opsBasis2Size, std::invalid_argument, "opsBasis1.size() != opsBasis2.size()");
251  INTREPID2_TEST_FOR_EXCEPTION(size != weightsSize, std::invalid_argument, "opsBasis1.size() != weights.size()");
252 
253  for (ordinal_type i=0; i<size; i++)
254  {
255  ops.push_back(std::vector<EOperator>{opsBasis1[i],opsBasis2[i]});
256  }
257  }
258 
259  OperatorTensorDecomposition(const std::vector< std::vector<EOperator> > &vectorEntryOps, const std::vector<double> &vectorComponentWeights)
260  :
261  ops(vectorEntryOps),
262  weights(vectorComponentWeights)
263  {
264  const ordinal_type numVectorComponents = ops.size();
265  const ordinal_type weightsSize = weights.size();
266  INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents != weightsSize, std::invalid_argument, "opsBasis1.size() != weights.size()");
267 
268  INTREPID2_TEST_FOR_EXCEPTION(numVectorComponents == 0, std::invalid_argument, "must have at least one entry!");
269 
270  ordinal_type numBases = 0;
271  for (ordinal_type i=0; i<numVectorComponents; i++)
272  {
273  if (numBases == 0)
274  {
275  numBases = ops[i].size();
276  }
277  else if (ops[i].size() != 0)
278  {
279  const ordinal_type opsiSize = ops[i].size();
280  INTREPID2_TEST_FOR_EXCEPTION(numBases != opsiSize, std::invalid_argument, "must have one operator for each basis in each nontrivial entry in vectorEntryOps");
281  }
282  }
283  INTREPID2_TEST_FOR_EXCEPTION(numBases == 0, std::invalid_argument, "at least one vectorEntryOps entry must be non-trivial");
284  numBasisComponents_ = numBases;
285  }
286 
287  OperatorTensorDecomposition(const std::vector<EOperator> &basisOps, const double weight = 1.0)
288  :
289  ops({basisOps}),
290  weights({weight}),
291  numBasisComponents_(basisOps.size())
292  {}
293 
294  OperatorTensorDecomposition(const EOperator &opBasis1, const EOperator &opBasis2, double weight = 1.0)
295  :
296  ops({ std::vector<EOperator>{opBasis1, opBasis2} }),
297  weights({weight}),
298  numBasisComponents_(2)
299  {}
300 
301  OperatorTensorDecomposition(const EOperator &opBasis1, const EOperator &opBasis2, const EOperator &opBasis3, double weight = 1.0)
302  :
303  ops({ std::vector<EOperator>{opBasis1, opBasis2, opBasis3} }),
304  weights({weight}),
305  numBasisComponents_(3)
306  {}
307 
308  ordinal_type numVectorComponents() const
309  {
310  return ops.size(); // will match weights.size()
311  }
312 
313  ordinal_type numBasisComponents() const
314  {
315  return numBasisComponents_;
316  }
317 
318  double weight(const ordinal_type &vectorComponentOrdinal) const
319  {
320  return weights[vectorComponentOrdinal];
321  }
322 
323  bool identicallyZeroComponent(const ordinal_type &vectorComponentOrdinal) const
324  {
325  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument, "vectorComponentOrdinal is out of bounds");
326  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument, "vectorComponentOrdinal is out of bounds");
327  return ops[vectorComponentOrdinal].size() == 0;
328  }
329 
330  EOperator op(const ordinal_type &vectorComponentOrdinal, const ordinal_type &basisOrdinal) const
331  {
332  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal < 0, std::invalid_argument, "vectorComponentOrdinal is out of bounds");
333  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(vectorComponentOrdinal >= numVectorComponents(), std::invalid_argument, "vectorComponentOrdinal is out of bounds");
334  if (identicallyZeroComponent(vectorComponentOrdinal))
335  {
336  return OPERATOR_MAX; // by convention: zero in this component
337  }
338  else
339  {
340  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal < 0, std::invalid_argument, "basisOrdinal is out of bounds");
341  INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(basisOrdinal >= numBasisComponents_, std::invalid_argument, "basisOrdinal is out of bounds");
342  return ops[vectorComponentOrdinal][basisOrdinal];
343  }
344  }
345 
347  template<typename DeviceType, typename OutputValueType, class PointValueType>
349  {
350  const ordinal_type basesSize = bases.size();
351  INTREPID2_TEST_FOR_EXCEPTION(basesSize != numBasisComponents_, std::invalid_argument, "The number of bases provided must match the number of basis components in this decomposition");
352 
353  ordinal_type numExpandedBasisComponents = 0;
355  using TensorBasis = Basis_TensorBasis<BasisBase>;
356  std::vector<TensorBasis*> basesAsTensorBasis(numBasisComponents_);
357  for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
358  {
359  TensorBasis* basisAsTensorBasis = dynamic_cast<TensorBasis*>(bases[basisComponentOrdinal].get());
360  basesAsTensorBasis[basisComponentOrdinal] = basisAsTensorBasis;
361  if (basisAsTensorBasis)
362  {
363  numExpandedBasisComponents += basisAsTensorBasis->getTensorBasisComponents().size();
364  }
365  else
366  {
367  numExpandedBasisComponents += 1;
368  }
369  }
370 
371  std::vector< std::vector<EOperator> > expandedOps; // outer index: vector entry ordinal; inner index: basis component ordinal.
372  std::vector<double> expandedWeights;
373  const ordinal_type opsSize = ops.size();
374  for (ordinal_type simpleVectorEntryOrdinal=0; simpleVectorEntryOrdinal<opsSize; simpleVectorEntryOrdinal++)
375  {
376  if (identicallyZeroComponent(simpleVectorEntryOrdinal))
377  {
378  expandedOps.push_back(std::vector<EOperator>{});
379  expandedWeights.push_back(0.0);
380  continue;
381  }
382 
383  std::vector< std::vector<EOperator> > expandedBasisOpsForSimpleVectorEntry(1); // start out with one outer entry; expands if a component is vector-valued
384 
385  // this lambda appends an op to each of the vector components
386  auto addExpandedOp = [&expandedBasisOpsForSimpleVectorEntry](const EOperator &op)
387  {
388  const ordinal_type size = expandedBasisOpsForSimpleVectorEntry.size();
389  for (ordinal_type i=0; i<size; i++)
390  {
391  expandedBasisOpsForSimpleVectorEntry[i].push_back(op);
392  }
393  };
394 
395  // this lambda takes a scalar-valued (single outer entry) expandedBasisOps and expands it
396  // according to the number of vector entries coming from the vector-valued component basis
397  auto vectorizeExpandedOps = [&expandedBasisOpsForSimpleVectorEntry](const int &numSubVectors)
398  {
399  // we require that this only gets called once per simpleVectorEntryOrdinal -- i.e., only one basis component gets to be vector-valued.
400  INTREPID2_TEST_FOR_EXCEPTION(expandedBasisOpsForSimpleVectorEntry.size() != 1, std::invalid_argument, "multiple basis components may not be vector-valued!");
401  for (ordinal_type i=1; i<numSubVectors; i++)
402  {
403  expandedBasisOpsForSimpleVectorEntry.push_back(expandedBasisOpsForSimpleVectorEntry[0]);
404  }
405  };
406 
407  std::vector<EOperator> subVectorOps; // only used if one of the components is vector-valued
408  std::vector<double> subVectorWeights {weights[simpleVectorEntryOrdinal]};
409  for (ordinal_type basisComponentOrdinal=0; basisComponentOrdinal<numBasisComponents_; basisComponentOrdinal++)
410  {
411  const auto &op = ops[simpleVectorEntryOrdinal][basisComponentOrdinal];
412 
413  if (! basesAsTensorBasis[basisComponentOrdinal])
414  {
415  addExpandedOp(op);
416  }
417  else
418  {
419  OperatorTensorDecomposition basisOpDecomposition = basesAsTensorBasis[basisComponentOrdinal]->getOperatorDecomposition(op);
420  if (basisOpDecomposition.numVectorComponents() > 1)
421  {
422  // We don't currently support a use case where we have multiple component bases that are vector-valued:
423  INTREPID2_TEST_FOR_EXCEPTION(subVectorWeights.size() > 1, std::invalid_argument, "Unhandled case: multiple component bases are vector-valued");
424  // We do support a single vector-valued case, though; this splits the current simpleVectorEntryOrdinal into an appropriate number of components that come in order in the expanded vector
425  ordinal_type numSubVectors = basisOpDecomposition.numVectorComponents();
426  vectorizeExpandedOps(numSubVectors);
427 
428  double weightSoFar = subVectorWeights[0];
429  for (ordinal_type subVectorEntryOrdinal=1; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
430  {
431  subVectorWeights.push_back(weightSoFar * basisOpDecomposition.weight(subVectorEntryOrdinal));
432  }
433  subVectorWeights[0] *= basisOpDecomposition.weight(0);
434  for (ordinal_type subVectorEntryOrdinal=0; subVectorEntryOrdinal<numSubVectors; subVectorEntryOrdinal++)
435  {
436  for (ordinal_type subComponentBasis=0; subComponentBasis<basisOpDecomposition.numBasisComponents(); subComponentBasis++)
437  {
438  const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, subComponentBasis);
439  expandedBasisOpsForSimpleVectorEntry[subVectorEntryOrdinal].push_back(basisOp);
440  }
441  }
442  }
443  else
444  {
445  double componentWeight = basisOpDecomposition.weight(0);
446  const ordinal_type size = subVectorWeights.size();
447  for (ordinal_type i=0; i<size; i++)
448  {
449  subVectorWeights[i] *= componentWeight;
450  }
451  ordinal_type subVectorEntryOrdinal = 0;
452  const ordinal_type numBasisComponents = basisOpDecomposition.numBasisComponents();
453  for (ordinal_type subComponentBasis=0; subComponentBasis<numBasisComponents; subComponentBasis++)
454  {
455  const auto &basisOp = basisOpDecomposition.op(subVectorEntryOrdinal, basisComponentOrdinal);
456  addExpandedOp( basisOp );
457  }
458  }
459  }
460  }
461 
462  // sanity check on the new expandedOps entries:
463  for (ordinal_type i=0; i<static_cast<ordinal_type>(expandedBasisOpsForSimpleVectorEntry.size()); i++)
464  {
465  const ordinal_type size = expandedBasisOpsForSimpleVectorEntry[i].size();
466  INTREPID2_TEST_FOR_EXCEPTION(size != numExpandedBasisComponents, std::logic_error, "each vector in expandedBasisOpsForSimpleVectorEntry should have as many entries as there are expanded basis components");
467  }
468 
469  expandedOps.insert(expandedOps.end(), expandedBasisOpsForSimpleVectorEntry.begin(), expandedBasisOpsForSimpleVectorEntry.end());
470  expandedWeights.insert(expandedWeights.end(), subVectorWeights.begin(), subVectorWeights.end());
471  }
472  // check that vector lengths agree:
473  INTREPID2_TEST_FOR_EXCEPTION(expandedOps.size() != expandedWeights.size(), std::logic_error, "expandedWeights and expandedOps do not agree on the number of vector components");
474 
475  OperatorTensorDecomposition result(expandedOps, expandedWeights);
476  result.setRotateXYNinetyDegrees(rotateXYNinetyDegrees_);
477  return result;
478  }
479 
482  {
483  return rotateXYNinetyDegrees_;
484  }
485 
486  void setRotateXYNinetyDegrees(const bool &value)
487  {
488  rotateXYNinetyDegrees_ = value;
489  }
490 };
491 
497  template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
499  {
500  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
501  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
502 
503  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
504  using TeamMember = typename TeamPolicy::member_type;
505 
507  using RankCombinationType = typename TensorViewIteratorType::RankCombinationType;
508 
509  OutputFieldType output_; // F,P[,D…]
510  OutputFieldType input1_; // F1,P[,D…] or F1,P1[,D…]
511  OutputFieldType input2_; // F2,P[,D…] or F2,P2[,D…]
512 
513  int numFields_, numPoints_;
514  int numFields1_, numPoints1_;
515  int numFields2_, numPoints2_;
516 
517  bool tensorPoints_; // if true, input1 and input2 refer to values at decomposed points, and P = P1 * P2. If false, then the two inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
518 
519  using RankCombinationViewType = typename TensorViewIteratorType::RankCombinationViewType;
520  RankCombinationViewType rank_combinations_;// indicates the policy by which the input views will be combined in output view
521 
522  double weight_;
523 
524  public:
525 
526  TensorViewFunctor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2,
527  bool tensorPoints, double weight)
528  : output_(output), input1_(inputValues1), input2_(inputValues2), tensorPoints_(tensorPoints), weight_(weight)
529  {
530  numFields_ = output.extent_int(0);
531  numPoints_ = output.extent_int(1);
532 
533  numFields1_ = inputValues1.extent_int(0);
534  numPoints1_ = inputValues1.extent_int(1);
535 
536  numFields2_ = inputValues2.extent_int(0);
537  numPoints2_ = inputValues2.extent_int(1);
538 
539  if (!tensorPoints_)
540  {
541  // then the point counts should all match
542  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
543  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
544  }
545  else
546  {
547  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_, std::invalid_argument, "incompatible point counts");
548  }
549 
550  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_, std::invalid_argument, "incompatible field sizes");
551 
552  const ordinal_type max_rank = std::max(inputValues1.rank(),inputValues2.rank());
553  // at present, no supported case will result in an output rank greater than both input ranks
554 
555  const ordinal_type outputRank = output.rank();
556  INTREPID2_TEST_FOR_EXCEPTION(outputRank > max_rank, std::invalid_argument, "Unsupported view combination.");
557  rank_combinations_ = RankCombinationViewType("Rank_combinations_", max_rank);
558  auto rank_combinations_host = Kokkos::create_mirror_view(rank_combinations_);
559 
560  rank_combinations_host[0] = TensorViewIteratorType::TENSOR_PRODUCT; // field combination is always tensor product
561  rank_combinations_host[1] = tensorPoints ? TensorViewIteratorType::TENSOR_PRODUCT : TensorViewIteratorType::DIMENSION_MATCH; // tensorPoints controls interpretation of the point dimension
562  for (ordinal_type d=2; d<max_rank; d++)
563  {
564  // d >= 2 have the interpretation of spatial dimensions (gradients, etc.)
565  // we let the extents of the containers determine what we're doing here
566  if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == 1))
567  {
568  rank_combinations_host[d] = TensorViewIteratorType::TENSOR_CONTRACTION;
569  }
570  else if (((inputValues1.extent_int(d) == output.extent_int(d)) && (inputValues2.extent_int(d) == 1))
571  || ((inputValues2.extent_int(d) == output.extent_int(d)) && (inputValues1.extent_int(d) == 1))
572  )
573  {
574  // this looks like multiplication of a vector by a scalar, resulting in a vector
575  // this can be understood as a tensor product
576  rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
577  }
578  else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d) * inputValues2.extent_int(d)))
579  {
580  // this is actually a generalization of the above case: a tensor product, something like a vector outer product
581  rank_combinations_host[d] = TensorViewIteratorType::TENSOR_PRODUCT;
582  }
583  else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d)))
584  {
585  // it's a bit weird (I'm not aware of the use case, in the present context), but we can handle this case by adopting DIMENSION_MATCH here
586  // this is something like MATLAB's .* and .+ operators, which operate entry-wise
587  rank_combinations_host[d] = TensorViewIteratorType::DIMENSION_MATCH;
588  }
589  else
590  {
591  std::cout << "inputValues1.extent_int(" << d << ") = " << inputValues1.extent_int(d) << std::endl;
592  std::cout << "inputValues2.extent_int(" << d << ") = " << inputValues2.extent_int(d) << std::endl;
593  std::cout << "output.extent_int(" << d << ") = " << output.extent_int(d) << std::endl;
594  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "unable to find an interpretation for this combination of views");
595  }
596  }
597  Kokkos::deep_copy(rank_combinations_,rank_combinations_host);
598  }
599 
600  KOKKOS_INLINE_FUNCTION
601  void operator()( const TeamMember & teamMember ) const
602  {
603  auto fieldOrdinal1 = teamMember.league_rank();
604 
605  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
606  TensorViewIteratorType it(output_,input1_,input2_,rank_combinations_);
607  const int FIELD_ORDINAL_DIMENSION = 0;
608  it.setLocation({fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0});
609  int next_increment_rank = FIELD_ORDINAL_DIMENSION; // used to initialize prev_increment_rank at the start of the do/while loop. Notionally, we last incremented in the field ordinal rank to get to the {fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0} location.
610  OutputScalar accumulator = 0;
611 
612  do
613  {
614  accumulator += weight_ * it.getView1Entry() * it.getView2Entry();
615  next_increment_rank = it.nextIncrementRank();
616 
617  if ((next_increment_rank < 0) || (rank_combinations_[next_increment_rank] != TensorViewIteratorType::TENSOR_CONTRACTION))
618  {
619  // then we've finished the accumulation and should set the value
620  it.set(accumulator);
621  // reset the accumulator:
622  accumulator = 0;
623  }
624  } while (it.increment() > FIELD_ORDINAL_DIMENSION);
625  });
626  }
627  };
628 
642  template<typename BasisBaseClass = void>
643  class Basis_TensorBasis
644  :
645  public BasisBaseClass
646  {
647  public:
648  using BasisBase = BasisBaseClass;
649  using BasisPtr = Teuchos::RCP<BasisBase>;
650 
651  protected:
652  BasisPtr basis1_;
653  BasisPtr basis2_;
654 
655  std::vector<BasisPtr> tensorComponents_;
656 
657  std::string name_; // name of the basis
658 
659  int numTensorialExtrusions_; // relative to cell topo returned by getBaseCellTopology().
660  public:
661  using DeviceType = typename BasisBase::DeviceType;
662  using ExecutionSpace = typename BasisBase::ExecutionSpace;
663  using OutputValueType = typename BasisBase::OutputValueType;
664  using PointValueType = typename BasisBase::PointValueType;
665 
666  using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost;
667  using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost;
668  using OutputViewType = typename BasisBase::OutputViewType;
669  using PointViewType = typename BasisBase::PointViewType;
670  using TensorBasis = Basis_TensorBasis<BasisBaseClass>;
671  public:
678  Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace = FUNCTION_SPACE_MAX,
679  const bool useShardsCellTopologyAndTags = false)
680  :
681  basis1_(basis1),basis2_(basis2)
682  {
683  this->functionSpace_ = functionSpace;
684 
685  Basis_TensorBasis* basis1AsTensor = dynamic_cast<Basis_TensorBasis*>(basis1_.get());
686  if (basis1AsTensor)
687  {
688  auto basis1Components = basis1AsTensor->getTensorBasisComponents();
689  tensorComponents_.insert(tensorComponents_.end(), basis1Components.begin(), basis1Components.end());
690  }
691  else
692  {
693  tensorComponents_.push_back(basis1_);
694  }
695 
696  Basis_TensorBasis* basis2AsTensor = dynamic_cast<Basis_TensorBasis*>(basis2_.get());
697  if (basis2AsTensor)
698  {
699  auto basis2Components = basis2AsTensor->getTensorBasisComponents();
700  tensorComponents_.insert(tensorComponents_.end(), basis2Components.begin(), basis2Components.end());
701  }
702  else
703  {
704  tensorComponents_.push_back(basis2_);
705  }
706 
707  this->basisCardinality_ = basis1->getCardinality() * basis2->getCardinality();
708  this->basisDegree_ = std::max(basis1->getDegree(), basis2->getDegree());
709 
710  {
711  std::ostringstream basisName;
712  basisName << basis1->getName() << " x " << basis2->getName();
713  name_ = basisName.str();
714  }
715 
716  // set cell topology
717  this->basisCellTopology_ = tensorComponents_[0]->getBaseCellTopology();
718  this->numTensorialExtrusions_ = tensorComponents_.size() - 1;
719 
720  this->basisType_ = basis1_->getBasisType();
721  this->basisCoordinates_ = COORDINATES_CARTESIAN;
722 
723  ordinal_type spaceDim1 = basis1_->getDomainDimension();
724  ordinal_type spaceDim2 = basis2_->getDomainDimension();
725 
726  INTREPID2_TEST_FOR_EXCEPTION(spaceDim2 != 1, std::invalid_argument, "TensorBasis only supports 1D bases in basis2_ position");
727 
728  if (this->getBasisType() == BASIS_FEM_HIERARCHICAL)
729  {
730  // fill in degree lookup:
731  int degreeSize = basis1_->getPolynomialDegreeLength() + basis2_->getPolynomialDegreeLength();
732  this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("TensorBasis - field ordinal polynomial degree", this->basisCardinality_, degreeSize);
733  this->fieldOrdinalH1PolynomialDegree_ = OrdinalTypeArray2DHost("TensorBasis - field ordinal polynomial H^1 degree", this->basisCardinality_, degreeSize);
734 
735  const ordinal_type basis1Cardinality = basis1_->getCardinality();
736  const ordinal_type basis2Cardinality = basis2_->getCardinality();
737 
738  int degreeLengthField1 = basis1_->getPolynomialDegreeLength();
739  int degreeLengthField2 = basis2_->getPolynomialDegreeLength();
740 
741  for (ordinal_type fieldOrdinal1 = 0; fieldOrdinal1 < basis1Cardinality; fieldOrdinal1++)
742  {
743  OrdinalTypeArray1DHost degreesField1 = basis1_->getPolynomialDegreeOfField(fieldOrdinal1);
744  OrdinalTypeArray1DHost h1DegreesField1 = basis1_->getH1PolynomialDegreeOfField(fieldOrdinal1);
745  for (ordinal_type fieldOrdinal2 = 0; fieldOrdinal2 < basis2Cardinality; fieldOrdinal2++)
746  {
747  OrdinalTypeArray1DHost degreesField2 = basis2_->getPolynomialDegreeOfField(fieldOrdinal2);
748  OrdinalTypeArray1DHost h1DegreesField2 = basis2_->getH1PolynomialDegreeOfField(fieldOrdinal2);
749  const ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1Cardinality + fieldOrdinal1;
750 
751  for (int d3=0; d3<degreeLengthField1; d3++)
752  {
753  this->fieldOrdinalPolynomialDegree_ (tensorFieldOrdinal,d3) = degreesField1(d3);
754  this->fieldOrdinalH1PolynomialDegree_(tensorFieldOrdinal,d3) = h1DegreesField1(d3);
755  }
756  for (int d3=0; d3<degreeLengthField2; d3++)
757  {
758  this->fieldOrdinalPolynomialDegree_ (tensorFieldOrdinal,d3+degreeLengthField1) = degreesField2(d3);
759  this->fieldOrdinalH1PolynomialDegree_(tensorFieldOrdinal,d3+degreeLengthField1) = h1DegreesField2(d3);
760  }
761  }
762  }
763  }
764 
765  if (useShardsCellTopologyAndTags)
766  {
767  setShardsTopologyAndTags();
768  }
769  else
770  {
771  // we build tags recursively, making reference to basis1_ and basis2_'s tags to produce the tensor product tags.
772  // // initialize tags
773  const auto & cardinality = this->basisCardinality_;
774 
775  // Basis-dependent initializations
776  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
777  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
778  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
779  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
780  const ordinal_type posDfCnt = 3; // position in the tag, counting from 0, of DoF count for the subcell
781 
782  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
783 
784  // we assume that basis2_ is defined on a line, and that basis1_ is defined on a domain that is once-extruded in by that line.
785  auto cellTopo = CellTopology::cellTopology(this->basisCellTopology_, numTensorialExtrusions_);
786  auto basis1Topo = cellTopo->getTensorialComponent();
787 
788  const ordinal_type spaceDim = spaceDim1 + spaceDim2;
789  const ordinal_type sideDim = spaceDim - 1;
790 
791  const OrdinalTypeArray2DHost ordinalToTag1 = basis1_->getAllDofTags();
792  const OrdinalTypeArray2DHost ordinalToTag2 = basis2_->getAllDofTags();
793 
794  for (int fieldOrdinal1=0; fieldOrdinal1<basis1_->getCardinality(); fieldOrdinal1++)
795  {
796  ordinal_type subcellDim1 = ordinalToTag1(fieldOrdinal1,posScDim);
797  ordinal_type subcellOrd1 = ordinalToTag1(fieldOrdinal1,posScOrd);
798  ordinal_type subcellDfCnt1 = ordinalToTag1(fieldOrdinal1,posDfCnt);
799  for (int fieldOrdinal2=0; fieldOrdinal2<basis2_->getCardinality(); fieldOrdinal2++)
800  {
801  ordinal_type subcellDim2 = ordinalToTag2(fieldOrdinal2,posScDim);
802  ordinal_type subcellOrd2 = ordinalToTag2(fieldOrdinal2,posScOrd);
803  ordinal_type subcellDfCnt2 = ordinalToTag2(fieldOrdinal2,posDfCnt);
804 
805  ordinal_type subcellDim = subcellDim1 + subcellDim2;
806  ordinal_type subcellOrd;
807  if (subcellDim2 == 0)
808  {
809  // vertex node in extrusion; the subcell is not extruded but belongs to one of the two "copies"
810  // of the basis1 topology
811  ordinal_type sideOrdinal = cellTopo->getTensorialComponentSideOrdinal(subcellOrd2); // subcellOrd2 is a "side" of the line topology
812  subcellOrd = CellTopology::getSubcellOrdinalMap(cellTopo, sideDim, sideOrdinal,
813  subcellDim1, subcellOrd1);
814  }
815  else
816  {
817  // line subcell in time; the subcell *is* extruded in final dimension
818  subcellOrd = cellTopo->getExtrudedSubcellOrdinal(subcellDim1, subcellOrd1);
819  if (subcellOrd == -1)
820  {
821  std::cout << "ERROR: -1 subcell ordinal.\n";
822  subcellOrd = cellTopo->getExtrudedSubcellOrdinal(subcellDim1, subcellOrd1);
823  }
824  }
825  ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1_->getCardinality() + fieldOrdinal1;
826  // cout << "(" << fieldOrdinal1 << "," << fieldOrdinal2 << ") --> " << i << endl;
827  ordinal_type dofOffsetOrdinal1 = ordinalToTag1(fieldOrdinal1,posDfOrd);
828  ordinal_type dofOffsetOrdinal2 = ordinalToTag2(fieldOrdinal2,posDfOrd);
829  ordinal_type dofsForSubcell1 = ordinalToTag1(fieldOrdinal1,posDfCnt);
830  ordinal_type dofOffsetOrdinal = dofOffsetOrdinal2 * dofsForSubcell1 + dofOffsetOrdinal1;
831  tagView(tagSize*tensorFieldOrdinal + posScDim) = subcellDim; // subcellDim
832  tagView(tagSize*tensorFieldOrdinal + posScOrd) = subcellOrd; // subcell ordinal
833  tagView(tagSize*tensorFieldOrdinal + posDfOrd) = dofOffsetOrdinal; // ordinal of the specified DoF relative to the subcell
834  tagView(tagSize*tensorFieldOrdinal + posDfCnt) = subcellDfCnt1 * subcellDfCnt2; // total number of DoFs associated with the subcell
835  }
836  }
837 
838  // // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
839  // // tags are constructed on host
840  this->setOrdinalTagData(this->tagToOrdinal_,
841  this->ordinalToTag_,
842  tagView,
843  this->basisCardinality_,
844  tagSize,
845  posScDim,
846  posScOrd,
847  posDfOrd);
848  }
849  }
850 
851  void setShardsTopologyAndTags()
852  {
853  shards::CellTopology cellTopo1 = basis1_->getBaseCellTopology();
854  shards::CellTopology cellTopo2 = basis2_->getBaseCellTopology();
855 
856  auto cellKey1 = basis1_->getBaseCellTopology().getKey();
857  auto cellKey2 = basis2_->getBaseCellTopology().getKey();
858 
859  const int numTensorialExtrusions = basis1_->getNumTensorialExtrusions() + basis2_->getNumTensorialExtrusions();
860  if ((cellKey1 == shards::Line<2>::key) && (cellKey2 == shards::Line<2>::key) && (numTensorialExtrusions == 0))
861  {
862  this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() );
863  }
864  else if ( ((cellKey1 == shards::Quadrilateral<4>::key) && (cellKey2 == shards::Line<2>::key))
865  || ((cellKey2 == shards::Quadrilateral<4>::key) && (cellKey1 == shards::Line<2>::key))
866  || ((cellKey1 == shards::Line<2>::key) && (cellKey2 == shards::Line<2>::key) && (numTensorialExtrusions == 1))
867  )
868  {
869  this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Hexahedron<8> >() );
870  }
871  else if ((cellKey1 == shards::Triangle<3>::key) && (cellKey2 == shards::Line<2>::key))
872  {
873  this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Wedge<6> >() );
874  }
875  else
876  {
877  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Cell topology combination not yet supported");
878  }
879 
880  // numTensorialExtrusions_ is relative to the basisCellTopology_; what we've just done is found a cell topology of the same spatial dimension as the extruded topology, so now numTensorialExtrusions_ should be 0.
881  numTensorialExtrusions_ = 0;
882 
883  // initialize tags
884  {
885  const auto & cardinality = this->basisCardinality_;
886 
887  // Basis-dependent initializations
888  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
889  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
890  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
891  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
892 
893  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
894 
895  shards::CellTopology cellTopo = this->basisCellTopology_;
896 
897  ordinal_type tensorSpaceDim = cellTopo.getDimension();
898  ordinal_type spaceDim1 = cellTopo1.getDimension();
899  ordinal_type spaceDim2 = cellTopo2.getDimension();
900 
901  TensorTopologyMap topoMap(cellTopo1, cellTopo2);
902 
903  for (ordinal_type d=0; d<=tensorSpaceDim; d++) // d: tensorial dimension
904  {
905  ordinal_type d2_max = std::min(spaceDim2, d);
906  for (ordinal_type d2=0; d2<=d2_max; d2++)
907  {
908  ordinal_type d1 = d-d2;
909  if (d1 > spaceDim1) continue;
910 
911  ordinal_type subcellCount2 = cellTopo2.getSubcellCount(d2);
912  ordinal_type subcellCount1 = cellTopo1.getSubcellCount(d1);
913  for (ordinal_type subcellOrdinal2=0; subcellOrdinal2<subcellCount2; subcellOrdinal2++)
914  {
915  ordinal_type subcellDofCount2 = basis2_->getDofCount(d2, subcellOrdinal2);
916  for (ordinal_type subcellOrdinal1=0; subcellOrdinal1<subcellCount1; subcellOrdinal1++)
917  {
918  ordinal_type subcellDofCount1 = basis1_->getDofCount(d1, subcellOrdinal1);
919  ordinal_type tensorLocalDofCount = subcellDofCount1 * subcellDofCount2;
920  for (ordinal_type localDofID2 = 0; localDofID2<subcellDofCount2; localDofID2++)
921  {
922  ordinal_type fieldOrdinal2 = basis2_->getDofOrdinal(d2, subcellOrdinal2, localDofID2);
923  OrdinalTypeArray1DHost degreesField2;
924  if (this->basisType_ == BASIS_FEM_HIERARCHICAL) degreesField2 = basis2_->getPolynomialDegreeOfField(fieldOrdinal2);
925  for (ordinal_type localDofID1 = 0; localDofID1<subcellDofCount1; localDofID1++)
926  {
927  ordinal_type fieldOrdinal1 = basis1_->getDofOrdinal(d1, subcellOrdinal1, localDofID1);
928  ordinal_type tensorLocalDofID = localDofID2 * subcellDofCount1 + localDofID1;
929  ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1_->getCardinality() + fieldOrdinal1;
930  tagView(tensorFieldOrdinal*tagSize+0) = d; // subcell dimension
931  tagView(tensorFieldOrdinal*tagSize+1) = topoMap.getCompositeSubcellOrdinal(d1, subcellOrdinal1, d2, subcellOrdinal2);
932  tagView(tensorFieldOrdinal*tagSize+2) = tensorLocalDofID;
933  tagView(tensorFieldOrdinal*tagSize+3) = tensorLocalDofCount;
934  } // localDofID1
935  } // localDofID2
936  } // subcellOrdinal1
937  } // subcellOrdinal2
938  }
939  }
940 
941  // // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
942  // // tags are constructed on host
943  this->setOrdinalTagData(this->tagToOrdinal_,
944  this->ordinalToTag_,
945  tagView,
946  this->basisCardinality_,
947  tagSize,
948  posScDim,
949  posScOrd,
950  posDfOrd);
951  }
952  }
953 
954  virtual int getNumTensorialExtrusions() const override
955  {
956  return numTensorialExtrusions_;
957  }
958 
967  ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1,
968  ordinal_type dkEnum2, ordinal_type operatorOrder2) const
969  {
970  ordinal_type spaceDim1 = basis1_->getDomainDimension();
971  ordinal_type spaceDim2 = basis2_->getDomainDimension();
972 
973  // We support total spaceDim <= 7.
974  switch (spaceDim1)
975  {
976  case 0:
977  {
978  INTREPID2_TEST_FOR_EXCEPTION(operatorOrder1 > 0, std::invalid_argument, "For spaceDim1 = 0, operatorOrder1 must be 0.");
979  return dkEnum2;
980  }
981  case 1:
982  switch (spaceDim2)
983  {
984  case 1: return getDkTensorIndex<1, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
985  case 2: return getDkTensorIndex<1, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
986  case 3: return getDkTensorIndex<1, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
987  case 4: return getDkTensorIndex<1, 4>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
988  case 5: return getDkTensorIndex<1, 5>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
989  case 6: return getDkTensorIndex<1, 6>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
990  default:
991  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
992  }
993  case 2:
994  switch (spaceDim2)
995  {
996  case 1: return getDkTensorIndex<2, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
997  case 2: return getDkTensorIndex<2, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
998  case 3: return getDkTensorIndex<2, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
999  case 4: return getDkTensorIndex<2, 4>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1000  case 5: return getDkTensorIndex<2, 5>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1001  default:
1002  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1003  }
1004  case 3:
1005  switch (spaceDim2)
1006  {
1007  case 1: return getDkTensorIndex<3, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1008  case 2: return getDkTensorIndex<3, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1009  case 3: return getDkTensorIndex<3, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1010  case 4: return getDkTensorIndex<3, 4>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1011  default:
1012  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1013  }
1014  case 4:
1015  switch (spaceDim2)
1016  {
1017  case 1: return getDkTensorIndex<4, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1018  case 2: return getDkTensorIndex<4, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1019  case 3: return getDkTensorIndex<4, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1020  default:
1021  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1022  }
1023  case 5:
1024  switch (spaceDim2)
1025  {
1026  case 1: return getDkTensorIndex<5, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1027  case 2: return getDkTensorIndex<5, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1028  default:
1029  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1030  }
1031  case 6:
1032  switch (spaceDim2)
1033  {
1034  case 1: return getDkTensorIndex<6, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
1035  default:
1036  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1037  }
1038  default:
1039  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
1040  }
1041  }
1042 
1047  virtual OperatorTensorDecomposition getSimpleOperatorDecomposition(const EOperator &operatorType) const
1048  {
1049  const int spaceDim = this->getDomainDimension();
1050 
1051  const EOperator VALUE = Intrepid2::OPERATOR_VALUE;
1052 
1053  std::vector< std::vector<EOperator> > opsVALUE{{VALUE, VALUE}};
1054 
1055  std::vector< std::vector<EOperator> > ops(spaceDim);
1056 
1057  switch (operatorType)
1058  {
1059  case VALUE:
1060  ops = opsVALUE;
1061  break;
1062  case OPERATOR_DIV:
1063  case OPERATOR_CURL:
1064  // DIV and CURL are multi-family bases; subclasses are required to override
1065  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported operator type - TensorBasis subclass should override");
1066  break;
1067  case OPERATOR_GRAD:
1068  case OPERATOR_D1:
1069  case OPERATOR_D2:
1070  case OPERATOR_D3:
1071  case OPERATOR_D4:
1072  case OPERATOR_D5:
1073  case OPERATOR_D6:
1074  case OPERATOR_D7:
1075  case OPERATOR_D8:
1076  case OPERATOR_D9:
1077  case OPERATOR_D10:
1078  case OPERATOR_Dn:
1079  {
1080  auto opOrder = getOperatorOrder(operatorType); // number of derivatives that we take in total
1081  const int dkCardinality = getDkCardinality(operatorType, 2); // 2 because we have two tensor component bases, basis1_ and basis2_
1082 
1083  ops = std::vector< std::vector<EOperator> >(dkCardinality);
1084 
1085  // the Dk enumeration happens in lexicographic order (reading from left to right: x, y, z, etc.)
1086  // this governs the nesting order of the dkEnum1, dkEnum2 for loops below: dkEnum2 should increment fastest.
1087  for (int derivativeCountComp2=0; derivativeCountComp2<=opOrder; derivativeCountComp2++)
1088  {
1089  int derivativeCountComp1=opOrder-derivativeCountComp2;
1090  EOperator op1 = (derivativeCountComp1 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp1 - 1));
1091  EOperator op2 = (derivativeCountComp2 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp2 - 1));
1092 
1093  int dkCardinality1 = getDkCardinality(op1, 1); // use dim = 1 because this is a "simple" decomposition -- full decomposition will expand within the dimensions of basis1_
1094  int dkCardinality2 = getDkCardinality(op2, 1); // use dim = 1 because this is a "simple" decomposition -- full decomposition will expand within the dimensions of basis2_
1095 
1096  for (int dkEnum1=0; dkEnum1<dkCardinality1; dkEnum1++)
1097  {
1098  for (int dkEnum2=0; dkEnum2<dkCardinality2; dkEnum2++)
1099  {
1100  ordinal_type dkTensorIndex = getDkTensorIndex<1, 1>(dkEnum1, derivativeCountComp1, dkEnum2, derivativeCountComp2);
1101  ops[dkTensorIndex] = std::vector<EOperator>{op1, op2};
1102  }
1103  }
1104  }
1105  }
1106  break;
1107  }
1108 
1109  std::vector<double> weights(ops.size(), 1.0);
1110  return OperatorTensorDecomposition(ops, weights);
1111  }
1112 
1115  virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const
1116  {
1117  if (((operatorType >= OPERATOR_D1) && (operatorType <= OPERATOR_D10)) || (operatorType == OPERATOR_GRAD))
1118  {
1119  // ordering of the operators is reverse-lexicographic, reading left to right (highest-dimension is fastest-moving).
1120  // first entry will be (operatorType, VALUE, …, VALUE)
1121  // next will be (operatorType - 1, OP_D1, VALUE, …, VALUE)
1122  // then (operatorType - 1, VALUE, OP_D1, …, VALUE)
1123 
1124  ordinal_type numBasisComponents = tensorComponents_.size();
1125 
1126  auto opOrder = getOperatorOrder(operatorType); // number of derivatives that we take in total
1127  const int dkCardinality = getDkCardinality(operatorType, numBasisComponents);
1128 
1129  std::vector< std::vector<EOperator> > ops(dkCardinality);
1130 
1131  std::vector<EOperator> prevEntry(numBasisComponents, OPERATOR_VALUE);
1132  prevEntry[0] = operatorType;
1133 
1134  ops[0] = prevEntry;
1135 
1136  for (ordinal_type dkOrdinal=1; dkOrdinal<dkCardinality; dkOrdinal++)
1137  {
1138  std::vector<EOperator> entry = prevEntry;
1139 
1140  // decrement to follow reverse lexicographic ordering:
1141  /*
1142  How to tell when it is time to decrement the nth entry:
1143  1. Let a be the sum of the opOrders for entries 0 through n-1.
1144  2. Let b be the sum of the nth entry and the final entry.
1145  3. If opOrder == a + b, then the nth entry should be decremented.
1146  */
1147  ordinal_type cumulativeOpOrder = 0;
1148  ordinal_type finalOpOrder = getOperatorOrder(entry[numBasisComponents-1]);
1149  for (ordinal_type compOrdinal=0; compOrdinal<numBasisComponents; compOrdinal++)
1150  {
1151  const ordinal_type thisOpOrder = getOperatorOrder(entry[compOrdinal]);
1152  cumulativeOpOrder += thisOpOrder;
1153  if (cumulativeOpOrder + finalOpOrder == opOrder)
1154  {
1155  // decrement this
1156  EOperator decrementedOp;
1157  if (thisOpOrder == 1)
1158  {
1159  decrementedOp = OPERATOR_VALUE;
1160  }
1161  else
1162  {
1163  decrementedOp = static_cast<EOperator>(OPERATOR_D1 + ((thisOpOrder - 1) - 1));
1164  }
1165  entry[compOrdinal] = decrementedOp;
1166  const ordinal_type remainingOpOrder = opOrder - cumulativeOpOrder + 1;
1167  entry[compOrdinal+1] = static_cast<EOperator>(OPERATOR_D1 + (remainingOpOrder - 1));
1168  for (ordinal_type i=compOrdinal+2; i<numBasisComponents; i++)
1169  {
1170  entry[i] = OPERATOR_VALUE;
1171  }
1172  break;
1173  }
1174  }
1175  ops[dkOrdinal] = entry;
1176  prevEntry = entry;
1177  }
1178  std::vector<double> weights(dkCardinality, 1.0);
1179 
1180  return OperatorTensorDecomposition(ops, weights);
1181  }
1182  else
1183  {
1184  OperatorTensorDecomposition opSimpleDecomposition = this->getSimpleOperatorDecomposition(operatorType);
1185  std::vector<BasisPtr> componentBases {basis1_, basis2_};
1186  return opSimpleDecomposition.expandedDecomposition(componentBases);
1187  }
1188  }
1189 
1194  virtual BasisValues<OutputValueType,DeviceType> allocateBasisValues( TensorPoints<PointValueType,DeviceType> points, const EOperator operatorType = OPERATOR_VALUE) const override
1195  {
1196  const bool operatorIsDk = (operatorType >= OPERATOR_D1) && (operatorType <= OPERATOR_D10);
1197  const bool operatorSupported = (operatorType == OPERATOR_VALUE) || (operatorType == OPERATOR_GRAD) || (operatorType == OPERATOR_CURL) || (operatorType == OPERATOR_DIV) || operatorIsDk;
1198  INTREPID2_TEST_FOR_EXCEPTION(!operatorSupported, std::invalid_argument, "operator is not supported by allocateBasisValues");
1199 
1200  // check that points's spatial dimension matches the basis
1201  const int spaceDim = this->getDomainDimension();
1202  INTREPID2_TEST_FOR_EXCEPTION(spaceDim != points.extent_int(1), std::invalid_argument, "points must be shape (P,D), with D equal to the dimension of the basis domain");
1203 
1204  // check that points has enough tensor components
1205  ordinal_type numBasisComponents = tensorComponents_.size();
1206  if (numBasisComponents > points.numTensorComponents())
1207  {
1208  // Then we require points to have a trivial tensor structure. (Subclasses could be more sophisticated.)
1209  // (More sophisticated approaches are possible here, too, but likely the most common use case in which there is not a one-to-one correspondence
1210  // between basis components and point components will involve trivial tensor structure in the points...)
1211  INTREPID2_TEST_FOR_EXCEPTION(points.numTensorComponents() != 1, std::invalid_argument, "If points does not have the same number of tensor components as the basis, then it should have trivial tensor structure.");
1212  const ordinal_type numPoints = points.extent_int(0);
1213  auto outputView = this->allocateOutputView(numPoints, operatorType);
1214 
1215  Data<OutputValueType,DeviceType> outputData(outputView);
1216  TensorData<OutputValueType,DeviceType> outputTensorData(outputData);
1217 
1218  return BasisValues<OutputValueType,DeviceType>(outputTensorData);
1219  }
1220  INTREPID2_TEST_FOR_EXCEPTION(numBasisComponents > points.numTensorComponents(), std::invalid_argument, "points must have at least as many tensorial components as basis.");
1221 
1222  OperatorTensorDecomposition opDecomposition = getOperatorDecomposition(operatorType);
1223 
1224  ordinal_type numVectorComponents = opDecomposition.numVectorComponents();
1225  const bool useVectorData = numVectorComponents > 1;
1226 
1227  std::vector<ordinal_type> componentPointCounts(numBasisComponents);
1228  ordinal_type pointComponentNumber = 0;
1229  for (ordinal_type r=0; r<numBasisComponents; r++)
1230  {
1231  const ordinal_type compSpaceDim = tensorComponents_[r]->getDomainDimension();
1232  ordinal_type dimsSoFar = 0;
1233  ordinal_type numPointsForBasisComponent = 1;
1234  while (dimsSoFar < compSpaceDim)
1235  {
1236  INTREPID2_TEST_FOR_EXCEPTION(pointComponentNumber >= points.numTensorComponents(), std::invalid_argument, "Error in processing points container; perhaps it is mis-sized?");
1237  const int numComponentPoints = points.componentPointCount(pointComponentNumber);
1238  const int numComponentDims = points.getTensorComponent(pointComponentNumber).extent_int(1);
1239  numPointsForBasisComponent *= numComponentPoints;
1240  dimsSoFar += numComponentDims;
1241  INTREPID2_TEST_FOR_EXCEPTION(dimsSoFar > points.numTensorComponents(), std::invalid_argument, "Error in processing points container; perhaps it is mis-sized?");
1242  pointComponentNumber++;
1243  }
1244  componentPointCounts[r] = numPointsForBasisComponent;
1245  }
1246 
1247  if (useVectorData)
1248  {
1249  const int numFamilies = 1;
1250  std::vector< std::vector<TensorData<OutputValueType,DeviceType> > > vectorComponents(numFamilies, std::vector<TensorData<OutputValueType,DeviceType> >(numVectorComponents));
1251 
1252  const int familyOrdinal = 0;
1253  for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
1254  {
1255  if (!opDecomposition.identicallyZeroComponent(vectorComponentOrdinal))
1256  {
1257  std::vector< Data<OutputValueType,DeviceType> > componentData;
1258  for (ordinal_type r=0; r<numBasisComponents; r++)
1259  {
1260  const int numComponentPoints = componentPointCounts[r];
1261  const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
1262  auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
1263  componentData.push_back(Data<OutputValueType,DeviceType>(componentView));
1264  }
1265  vectorComponents[familyOrdinal][vectorComponentOrdinal] = TensorData<OutputValueType,DeviceType>(componentData);
1266  }
1267  }
1268  VectorData<OutputValueType,DeviceType> vectorData(vectorComponents);
1269  return BasisValues<OutputValueType,DeviceType>(vectorData);
1270  }
1271  else
1272  {
1273  // TensorData: single tensor product
1274  std::vector< Data<OutputValueType,DeviceType> > componentData;
1275 
1276  const ordinal_type vectorComponentOrdinal = 0;
1277  for (ordinal_type r=0; r<numBasisComponents; r++)
1278  {
1279  const int numComponentPoints = componentPointCounts[r];
1280  const EOperator op = opDecomposition.op(vectorComponentOrdinal, r);
1281  auto componentView = tensorComponents_[r]->allocateOutputView(numComponentPoints, op);
1282 
1283  const int rank = 2; // (F,P) -- TensorData-only BasisValues are always scalar-valued. Use VectorData for vector-valued.
1284  // (we need to be explicit about the rank argument because GRAD, even in 1D, elevates to rank 3), so e.g. DIV of HDIV uses a componentView that is rank 3;
1285  // we want Data to insulate us from that fact)
1286  const Kokkos::Array<int,7> extents {componentView.extent_int(0), componentView.extent_int(1), 1,1,1,1,1};
1287  Kokkos::Array<DataVariationType,7> variationType {GENERAL, GENERAL, CONSTANT, CONSTANT, CONSTANT, CONSTANT, CONSTANT };
1288  componentData.push_back(Data<OutputValueType,DeviceType>(componentView, rank, extents, variationType));
1289  }
1290 
1291  TensorData<OutputValueType,DeviceType> tensorData(componentData);
1292 
1293  std::vector< TensorData<OutputValueType,DeviceType> > tensorDataEntries {tensorData};
1294  return BasisValues<OutputValueType,DeviceType>(tensorDataEntries);
1295  }
1296  }
1297 
1298  // since the getValues() below only overrides the FEM variant, we specify that
1299  // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
1300  // (It's an error to use the FVD variant on this basis.)
1301  using BasisBase::getValues;
1302 
1314  void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition,
1315  PointViewType & inputPoints1, PointViewType & inputPoints2, bool &tensorDecompositionSucceeded) const
1316  {
1317  INTREPID2_TEST_FOR_EXCEPTION(attemptTensorDecomposition, std::invalid_argument, "tensor decomposition not yet supported");
1318 
1319  // for inputPoints that are actually tensor-product of component quadrature points (say),
1320  // having just the one input (which will have a lot of redundant point data) is suboptimal
1321  // The general case can have unique x/y/z coordinates at every point, though, so we have to support that
1322  // when this interface is used. But we may try detecting that the data is tensor-product and compressing
1323  // from there... Ultimately, we should also add a getValues() variant that takes multiple input point containers,
1324  // one for each tensorial dimension.
1325 
1326  // this initial implementation is intended to simplify development of 2D and 3D bases, while also opening
1327  // the possibility of higher-dimensional bases. It is not necessarily optimized for speed/memory. There
1328  // are things we can do in this regard, which may become important for matrix-free computations wherein
1329  // basis values don't get stored but are computed dynamically.
1330 
1331  int spaceDim1 = basis1_->getDomainDimension();
1332  int spaceDim2 = basis2_->getDomainDimension();
1333 
1334  int totalSpaceDim = inputPoints.extent_int(1);
1335 
1336  TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim);
1337 
1338  // first pass: just take subviews to get input points -- this will result in redundant computations when points are themselves tensor product (i.e., inputPoints itself contains redundant data)
1339 
1340  inputPoints1 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(0,spaceDim1));
1341  inputPoints2 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim));
1342 
1343  // std::cout << "inputPoints : " << inputPoints.extent(0) << " x " << inputPoints.extent(1) << std::endl;
1344  // std::cout << "inputPoints1 : " << inputPoints1.extent(0) << " x " << inputPoints1.extent(1) << std::endl;
1345  // std::cout << "inputPoints2 : " << inputPoints2.extent(0) << " x " << inputPoints2.extent(1) << std::endl;
1346 
1347  tensorDecompositionSucceeded = false;
1348  }
1349 
1358  virtual void getDofCoords( typename BasisBase::ScalarViewType dofCoords ) const override
1359  {
1360  int spaceDim1 = basis1_->getBaseCellTopology().getDimension();
1361  int spaceDim2 = basis2_->getBaseCellTopology().getDimension();
1362 
1363  using ValueType = typename BasisBase::ScalarViewType::value_type;
1364  using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
1365  using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
1366 
1367  const ordinal_type basisCardinality1 = basis1_->getCardinality();
1368  const ordinal_type basisCardinality2 = basis2_->getCardinality();
1369 
1370  ViewType dofCoords1("dofCoords1",basisCardinality1,spaceDim1);
1371  ViewType dofCoords2("dofCoords2",basisCardinality2,spaceDim2);
1372 
1373  basis1_->getDofCoords(dofCoords1);
1374  basis2_->getDofCoords(dofCoords2);
1375 
1376  Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
1377  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal2)
1378  {
1379  for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
1380  {
1381  const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
1382  for (int d1=0; d1<spaceDim1; d1++)
1383  {
1384  dofCoords(fieldOrdinal,d1) = dofCoords1(fieldOrdinal1,d1);
1385  }
1386  for (int d2=0; d2<spaceDim2; d2++)
1387  {
1388  dofCoords(fieldOrdinal,spaceDim1+d2) = dofCoords2(fieldOrdinal2,d2);
1389  }
1390  }
1391  });
1392  }
1393 
1394 
1404  virtual void getDofCoeffs( typename BasisBase::ScalarViewType dofCoeffs ) const override
1405  {
1406  using ValueType = typename BasisBase::ScalarViewType::value_type;
1407  using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
1408  using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
1409 
1410  const ordinal_type basisCardinality1 = basis1_->getCardinality();
1411  const ordinal_type basisCardinality2 = basis2_->getCardinality();
1412 
1413  bool isVectorBasis1 = getFieldRank(basis1_->getFunctionSpace()) == 1;
1414  bool isVectorBasis2 = getFieldRank(basis2_->getFunctionSpace()) == 1;
1415 
1416  INTREPID2_TEST_FOR_EXCEPTION(isVectorBasis1 && isVectorBasis2, std::invalid_argument, "the case in which basis1 and basis2 are vector bases is not supported");
1417 
1418  int basisDim1 = isVectorBasis1 ? basis1_->getBaseCellTopology().getDimension() : 1;
1419  int basisDim2 = isVectorBasis2 ? basis2_->getBaseCellTopology().getDimension() : 1;
1420 
1421  auto dofCoeffs1 = isVectorBasis1 ? ViewType("dofCoeffs1",basis1_->getCardinality(), basisDim1) : ViewType("dofCoeffs1",basis1_->getCardinality());
1422  auto dofCoeffs2 = isVectorBasis2 ? ViewType("dofCoeffs2",basis2_->getCardinality(), basisDim2) : ViewType("dofCoeffs2",basis2_->getCardinality());
1423 
1424  basis1_->getDofCoeffs(dofCoeffs1);
1425  basis2_->getDofCoeffs(dofCoeffs2);
1426 
1427  Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality2);
1428  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal2)
1429  {
1430  for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
1431  {
1432  const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
1433  for (int d1 = 0; d1 <basisDim1; d1++) {
1434  for (int d2 = 0; d2 <basisDim2; d2++) {
1435  dofCoeffs.access(fieldOrdinal,d1+d2) = dofCoeffs1.access(fieldOrdinal1,d1);
1436  dofCoeffs.access(fieldOrdinal,d1+d2) *= dofCoeffs2.access(fieldOrdinal2,d2);
1437  }
1438  }
1439  }
1440  });
1441  }
1442 
1447  virtual
1448  const char*
1449  getName() const override {
1450  return name_.c_str();
1451  }
1452 
1453  std::vector<BasisPtr> getTensorBasisComponents() const
1454  {
1455  return tensorComponents_;
1456  }
1457 
1469  virtual
1470  void
1472  const TensorPoints<PointValueType,DeviceType> inputPoints,
1473  const EOperator operatorType = OPERATOR_VALUE ) const override
1474  {
1475  const ordinal_type numTensorComponents = tensorComponents_.size();
1476  if (inputPoints.numTensorComponents() < numTensorComponents)
1477  {
1478  // then we require that both inputPoints and outputValues trivial tensor structure
1479  INTREPID2_TEST_FOR_EXCEPTION( inputPoints.numTensorComponents() != 1, std::invalid_argument, "If inputPoints differs from the tensor basis in component count, then inputPoints must have trivial tensor product structure" );
1480  INTREPID2_TEST_FOR_EXCEPTION( outputValues.numFamilies() != 1, std::invalid_argument, "If inputPoints differs from the tensor basis in component count, outputValues must have a single family with trivial tensor product structure" );
1481  INTREPID2_TEST_FOR_EXCEPTION( outputValues.tensorData().numTensorComponents() != 1, std::invalid_argument, "If inputPoints differs from the tensor basis in component count, outputValues must have a single family with trivial tensor product structure" );
1482 
1483  OutputViewType outputView = outputValues.tensorData().getTensorComponent(0).getUnderlyingView();
1484  PointViewType pointView = inputPoints.getTensorComponent(0);
1485  this->getValues(outputView, pointView, operatorType);
1486  return;
1487  }
1488 
1489  OperatorTensorDecomposition operatorDecomposition = getOperatorDecomposition(operatorType);
1490 
1491  const ordinal_type numVectorComponents = operatorDecomposition.numVectorComponents();
1492  const bool useVectorData = numVectorComponents > 1;
1493  const ordinal_type numBasisComponents = operatorDecomposition.numBasisComponents();
1494 
1495  for (ordinal_type vectorComponentOrdinal=0; vectorComponentOrdinal<numVectorComponents; vectorComponentOrdinal++)
1496  {
1497  const double weight = operatorDecomposition.weight(vectorComponentOrdinal);
1498  ordinal_type pointComponentOrdinal = 0;
1499  for (ordinal_type basisOrdinal=0; basisOrdinal<numBasisComponents; basisOrdinal++, pointComponentOrdinal++)
1500  {
1501  const EOperator op = operatorDecomposition.op(vectorComponentOrdinal, basisOrdinal);
1502  // by convention, op == OPERATOR_MAX signals a zero component; skip
1503  if (op != OPERATOR_MAX)
1504  {
1505  const int vectorFamily = 0; // TensorBasis always has just a single family; multiple families arise in DirectSumBasis
1506  auto tensorData = useVectorData ? outputValues.vectorData().getComponent(vectorFamily,vectorComponentOrdinal) : outputValues.tensorData();
1507  INTREPID2_TEST_FOR_EXCEPTION( ! tensorData.getTensorComponent(basisOrdinal).isValid(), std::invalid_argument, "Invalid output component encountered");
1508 
1509  const Data<OutputValueType,DeviceType> & outputData = tensorData.getTensorComponent(basisOrdinal);
1510 
1511  auto basisValueView = outputData.getUnderlyingView();
1512  PointViewType pointView = inputPoints.getTensorComponent(pointComponentOrdinal);
1513  const ordinal_type basisDomainDimension = tensorComponents_[basisOrdinal]->getDomainDimension();
1514  if (pointView.extent_int(1) == basisDomainDimension)
1515  {
1516  tensorComponents_[basisOrdinal]->getValues(basisValueView, pointView, op);
1517  }
1518  else
1519  {
1520  // we need to wrap the basisValueView in a BasisValues container, and to wrap the point components in a TensorPoints container.
1521 
1522  // combine point components to build up to basisDomainDimension
1523  ordinal_type dimsSoFar = 0;
1524  std::vector< ScalarView<PointValueType,DeviceType> > basisPointComponents;
1525  while (dimsSoFar < basisDomainDimension)
1526  {
1527  INTREPID2_TEST_FOR_EXCEPTION(pointComponentOrdinal >= inputPoints.numTensorComponents(), std::invalid_argument, "Error in processing points container; perhaps it is mis-sized?");
1528  const auto & pointComponent = inputPoints.getTensorComponent(pointComponentOrdinal);
1529  const ordinal_type numComponentDims = pointComponent.extent_int(1);
1530  dimsSoFar += numComponentDims;
1531  INTREPID2_TEST_FOR_EXCEPTION(dimsSoFar > inputPoints.numTensorComponents(), std::invalid_argument, "Error in processing points container; perhaps it is mis-sized?");
1532  basisPointComponents.push_back(pointComponent);
1533  if (dimsSoFar < basisDomainDimension)
1534  {
1535  // we will pass through this loop again, so we should increment the point component ordinal
1536  pointComponentOrdinal++;
1537  }
1538  }
1539 
1540  TensorPoints<PointValueType, DeviceType> basisPoints(basisPointComponents);
1541 
1542  bool useVectorData2 = (basisValueView.rank() == 3);
1543 
1545  if (useVectorData2)
1546  {
1547  VectorData<OutputValueType,DeviceType> vectorData(outputData);
1548  basisValues = BasisValues<OutputValueType,DeviceType>(vectorData);
1549  }
1550  else
1551  {
1552  TensorData<OutputValueType,DeviceType> tensorData2(outputData);
1553  basisValues = BasisValues<OutputValueType,DeviceType>(tensorData2);
1554  }
1555 
1556  tensorComponents_[basisOrdinal]->getValues(basisValues, basisPoints, op);
1557  }
1558 
1559  // op.rotateXYNinetyDegrees() is set to true for one of the H(curl) wedge families
1560  // (due to the fact that Intrepid2::EOperator does not allow us to extract individual vector components
1561  // via, e.g., OPERATOR_X, OPERATOR_Y, etc., we don't have a way of expressing the decomposition
1562  // just in terms of EOperator and component-wise scalar weights; we could also do this via component-wise
1563  // matrix weights, but this would involve a more intrusive change to the implementation).
1564  const bool spansXY = (vectorComponentOrdinal == 0) && (basisValueView.extent_int(2) == 2);
1565  if (spansXY && operatorDecomposition.rotateXYNinetyDegrees())
1566  {
1567  // map from (f_x,f_y) --> (-f_y,f_x)
1568  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1)});
1569  Kokkos::parallel_for("rotateXYNinetyDegrees", policy,
1570  KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal) {
1571  const auto f_x = basisValueView(fieldOrdinal,pointOrdinal,0); // copy
1572  const auto &f_y = basisValueView(fieldOrdinal,pointOrdinal,1); // reference
1573  basisValueView(fieldOrdinal,pointOrdinal,0) = -f_y;
1574  basisValueView(fieldOrdinal,pointOrdinal,1) = f_x;
1575  });
1576  }
1577 
1578  // if weight is non-trivial (not 1.0), then we need to multiply one of the component views by weight.
1579  // we do that for the first basisOrdinal's values
1580  if ((weight != 1.0) && (basisOrdinal == 0))
1581  {
1582  if (basisValueView.rank() == 2)
1583  {
1584  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1)});
1585  Kokkos::parallel_for("multiply basisValueView by weight", policy,
1586  KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal) {
1587  basisValueView(fieldOrdinal,pointOrdinal) *= weight;
1588  });
1589  }
1590  else if (basisValueView.rank() == 3)
1591  {
1592  auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>({0,0,0},{basisValueView.extent_int(0),basisValueView.extent_int(1),basisValueView.extent_int(2)});
1593  Kokkos::parallel_for("multiply basisValueView by weight", policy,
1594  KOKKOS_LAMBDA (const int &fieldOrdinal, const int &pointOrdinal, const int &d) {
1595  basisValueView(fieldOrdinal,pointOrdinal,d) *= weight;
1596  });
1597  }
1598  else
1599  {
1600  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported rank for basisValueView");
1601  }
1602  }
1603  }
1604  }
1605  }
1606  }
1607 
1626  void getValues( OutputViewType outputValues, const PointViewType inputPoints,
1627  const EOperator operatorType = OPERATOR_VALUE ) const override
1628  {
1629  bool tensorPoints; // true would mean that we take the tensor product of inputPoints1 and inputPoints2 (and that this would be equivalent to inputPoints as given -- i.e., inputPoints1 and inputPoints2 would be a tensor decomposition of inputPoints)
1630  bool attemptTensorDecomposition = false; // support for this not yet implemented
1631  PointViewType inputPoints1, inputPoints2;
1632  getComponentPoints(inputPoints, attemptTensorDecomposition, inputPoints1, inputPoints2, tensorPoints);
1633 
1634  const auto functionSpace = this->getFunctionSpace();
1635 
1636  if ((functionSpace == FUNCTION_SPACE_HVOL) || (functionSpace == FUNCTION_SPACE_HGRAD))
1637  {
1638  // then we can handle VALUE, GRAD, and Op_Dn without reference to subclass
1639  switch (operatorType)
1640  {
1641  case OPERATOR_VALUE:
1642  case OPERATOR_GRAD:
1643  case OPERATOR_D1:
1644  case OPERATOR_D2:
1645  case OPERATOR_D3:
1646  case OPERATOR_D4:
1647  case OPERATOR_D5:
1648  case OPERATOR_D6:
1649  case OPERATOR_D7:
1650  case OPERATOR_D8:
1651  case OPERATOR_D9:
1652  case OPERATOR_D10:
1653  {
1654  auto opOrder = getOperatorOrder(operatorType); // number of derivatives that we take in total
1655  // the Dk enumeration happens in lexicographic order (reading from left to right: x, y, z, etc.)
1656  // this governs the nesting order of the dkEnum1, dkEnum2 for loops below: dkEnum2 should increment fastest.
1657  for (int derivativeCountComp2=0; derivativeCountComp2<=opOrder; derivativeCountComp2++)
1658  {
1659  int derivativeCountComp1=opOrder-derivativeCountComp2;
1660  EOperator op1 = (derivativeCountComp1 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp1 - 1));
1661  EOperator op2 = (derivativeCountComp2 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp2 - 1));
1662 
1663  int spaceDim1 = inputPoints1.extent_int(1);
1664  int spaceDim2 = inputPoints2.extent_int(1);
1665 
1666  int dkCardinality1 = (op1 != OPERATOR_VALUE) ? getDkCardinality(op1, spaceDim1) : 1;
1667  int dkCardinality2 = (op2 != OPERATOR_VALUE) ? getDkCardinality(op2, spaceDim2) : 1;
1668 
1669  int basisCardinality1 = basis1_->getCardinality();
1670  int basisCardinality2 = basis2_->getCardinality();
1671 
1672  int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1673 
1674  int pointCount1, pointCount2;
1675  if (tensorPoints)
1676  {
1677  pointCount1 = inputPoints1.extent_int(0);
1678  pointCount2 = inputPoints2.extent_int(0);
1679  }
1680  else
1681  {
1682  pointCount1 = totalPointCount;
1683  pointCount2 = totalPointCount;
1684  }
1685 
1686  OutputViewType outputValues1, outputValues2;
1687  if (op1 == OPERATOR_VALUE)
1688  outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1);
1689  else
1690  outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1,dkCardinality1);
1691 
1692  if (op2 == OPERATOR_VALUE)
1693  outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2);
1694  else
1695  outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2,dkCardinality2);
1696 
1697  basis1_->getValues(outputValues1,inputPoints1,op1);
1698  basis2_->getValues(outputValues2,inputPoints2,op2);
1699 
1700  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1701  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1702  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1703 
1704  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1705 
1706  double weight = 1.0;
1708 
1709  for (int dkEnum1=0; dkEnum1<dkCardinality1; dkEnum1++)
1710  {
1711  auto outputValues1_dkEnum1 = (op1 != OPERATOR_VALUE) ? Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL(),dkEnum1)
1712  : Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL());
1713  for (int dkEnum2=0; dkEnum2<dkCardinality2; dkEnum2++)
1714  {
1715  auto outputValues2_dkEnum2 = (op2 != OPERATOR_VALUE) ? Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL(),dkEnum2)
1716  : Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL());
1717 
1718  ordinal_type dkTensorIndex = getTensorDkEnumeration(dkEnum1, derivativeCountComp1, dkEnum2, derivativeCountComp2);
1719  auto outputValues_dkTensor = Kokkos::subview(outputValues,Kokkos::ALL(),Kokkos::ALL(),dkTensorIndex);
1720  // Note that there may be performance optimizations available here:
1721  // - could eliminate interior for loop in favor of having a vector-valued outputValues1_dk
1722  // - could add support to TensorViewFunctor (and probably TensorViewIterator) for this kind of tensor Dk type of traversal
1723  // (this would allow us to eliminate both for loops here)
1724  // At the moment, we defer such optimizations on the idea that this may not ever become a performance bottleneck.
1725  FunctorType functor(outputValues_dkTensor, outputValues1_dkEnum1, outputValues2_dkEnum2, tensorPoints, weight);
1726  Kokkos::parallel_for("TensorViewFunctor", policy , functor);
1727  }
1728  }
1729  }
1730  }
1731  break;
1732  default: // non-OPERATOR_Dn case must be handled by subclass.
1733  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, tensorPoints);
1734  }
1735  }
1736  else
1737  {
1738  // not HVOL or HGRAD; subclass must handle
1739  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, tensorPoints);
1740  }
1741  }
1742 
1768  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1769  const PointViewType inputPoints1, const PointViewType inputPoints2,
1770  bool tensorPoints) const
1771  {
1772  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "one-operator, two-inputPoints getValues should be overridden by TensorBasis subclasses");
1773  }
1774 
1798  void getValues( OutputViewType outputValues,
1799  const PointViewType inputPoints1, const EOperator operatorType1,
1800  const PointViewType inputPoints2, const EOperator operatorType2,
1801  bool tensorPoints, double weight=1.0) const
1802  {
1803  int basisCardinality1 = basis1_->getCardinality();
1804  int basisCardinality2 = basis2_->getCardinality();
1805 
1806  int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
1807 
1808  int pointCount1, pointCount2;
1809  if (tensorPoints)
1810  {
1811  pointCount1 = inputPoints1.extent_int(0);
1812  pointCount2 = inputPoints2.extent_int(0);
1813  }
1814  else
1815  {
1816  pointCount1 = totalPointCount;
1817  pointCount2 = totalPointCount;
1818  }
1819 
1820  const ordinal_type spaceDim1 = inputPoints1.extent_int(1);
1821  const ordinal_type spaceDim2 = inputPoints2.extent_int(1);
1822 
1823  INTREPID2_TEST_FOR_EXCEPTION(!tensorPoints && (totalPointCount != inputPoints2.extent_int(0)),
1824  std::invalid_argument, "If tensorPoints is false, the point counts must match!");
1825 
1826  const ordinal_type opRank1 = getOperatorRank(basis1_->getFunctionSpace(), operatorType1, spaceDim1);
1827  const ordinal_type opRank2 = getOperatorRank(basis2_->getFunctionSpace(), operatorType2, spaceDim2);
1828 
1829  const ordinal_type outputRank1 = opRank1 + getFieldRank(basis1_->getFunctionSpace());
1830  const ordinal_type outputRank2 = opRank2 + getFieldRank(basis2_->getFunctionSpace());
1831 
1832  OutputViewType outputValues1, outputValues2;
1833  if (outputRank1 == 0)
1834  {
1835  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
1836  }
1837  else if (outputRank1 == 1)
1838  {
1839  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
1840  }
1841  else
1842  {
1843  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank1");
1844  }
1845 
1846  if (outputRank2 == 0)
1847  {
1848  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
1849  }
1850  else if (outputRank2 == 1)
1851  {
1852  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
1853  }
1854  else
1855  {
1856  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank2");
1857  }
1858 
1859  basis1_->getValues(outputValues1,inputPoints1,operatorType1);
1860  basis2_->getValues(outputValues2,inputPoints2,operatorType2);
1861 
1862  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
1863  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
1864  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1865 
1866  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1867 
1869 
1870  FunctorType functor(outputValues, outputValues1, outputValues2, tensorPoints, weight);
1871  Kokkos::parallel_for("TensorViewFunctor", policy , functor);
1872  }
1873 
1878  virtual HostBasisPtr<OutputValueType, PointValueType>
1879  getHostBasis() const override {
1880  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "TensorBasis subclasses must override getHostBasis");
1881  }
1882  }; // Basis_TensorBasis
1883 
1891  template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
1893  {
1894  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
1895  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
1896 
1897  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
1898  using TeamMember = typename TeamPolicy::member_type;
1899 
1900  OutputFieldType output_; // F,P
1901  OutputFieldType input1_; // F1,P[,D] or F1,P1[,D]
1902  OutputFieldType input2_; // F2,P[,D] or F2,P2[,D]
1903  OutputFieldType input3_; // F2,P[,D] or F2,P2[,D]
1904 
1905  int numFields_, numPoints_;
1906  int numFields1_, numPoints1_;
1907  int numFields2_, numPoints2_;
1908  int numFields3_, numPoints3_;
1909 
1910  bool tensorPoints_; // if true, input1, input2, input3 refer to values at decomposed points, and P = P1 * P2 * P3. If false, then the three inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
1911 
1912  double weight_;
1913 
1914  TensorBasis3_Functor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2, OutputFieldType inputValues3,
1915  bool tensorPoints, double weight)
1916  : output_(output), input1_(inputValues1), input2_(inputValues2), input3_(inputValues3), tensorPoints_(tensorPoints), weight_(weight)
1917  {
1918  numFields_ = output.extent_int(0);
1919  numPoints_ = output.extent_int(1);
1920 
1921  numFields1_ = inputValues1.extent_int(0);
1922  numPoints1_ = inputValues1.extent_int(1);
1923 
1924  numFields2_ = inputValues2.extent_int(0);
1925  numPoints2_ = inputValues2.extent_int(1);
1926 
1927  numFields3_ = inputValues3.extent_int(0);
1928  numPoints3_ = inputValues3.extent_int(1);
1929  /*
1930  We don't yet support tensor-valued bases here (only vector and scalar). The main design question is how the layouts
1931  of the input containers relates to the layout of the output container. The work we've done in TensorViewIterator basically
1932  shows the choices that can be made. It does appear that in most cases (at least (most of?) those supported by TensorViewIterator),
1933  we can infer from the dimensions of input/output containers what choice should be made in each dimension.
1934  */
1935  INTREPID2_TEST_FOR_EXCEPTION(inputValues1.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1936  INTREPID2_TEST_FOR_EXCEPTION(inputValues2.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1937  INTREPID2_TEST_FOR_EXCEPTION(inputValues3.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
1938  INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues2.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1939  INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1940  INTREPID2_TEST_FOR_EXCEPTION((inputValues2.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
1941 
1942  if (!tensorPoints_)
1943  {
1944  // then the point counts should all match
1945  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
1946  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
1947  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints3_, std::invalid_argument, "incompatible point counts");
1948  }
1949  else
1950  {
1951  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_ * numPoints3_, std::invalid_argument, "incompatible point counts");
1952  }
1953 
1954  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_ * numFields3_, std::invalid_argument, "incompatible field sizes");
1955  }
1956 
1957  KOKKOS_INLINE_FUNCTION
1958  void operator()( const TeamMember & teamMember ) const
1959  {
1960  auto fieldOrdinal1 = teamMember.league_rank();
1961 
1962  if (!tensorPoints_)
1963  {
1964  if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2))
1965  {
1966  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1967  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1968  {
1969  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1970  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1971  {
1972  output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1973  }
1974  }
1975  });
1976  }
1977  else if (input1_.rank() == 3)
1978  {
1979  int spaceDim = input1_.extent_int(2);
1980  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1981  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1982  {
1983  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1984  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1985  {
1986  for (int d=0; d<spaceDim; d++)
1987  {
1988  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal,d) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
1989  }
1990  }
1991  }
1992  });
1993  }
1994  else if (input2_.rank() == 3)
1995  {
1996  int spaceDim = input2_.extent_int(2);
1997  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1998  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1999  {
2000  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2001  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
2002  {
2003  for (int d=0; d<spaceDim; d++)
2004  {
2005  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal,d) * input3_(fieldOrdinal3,pointOrdinal);
2006  }
2007  }
2008  }
2009  });
2010  }
2011  else if (input3_.rank() == 3)
2012  {
2013  int spaceDim = input3_.extent_int(2);
2014  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
2015  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
2016  {
2017  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2018  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
2019  {
2020  for (int d=0; d<spaceDim; d++)
2021  {
2022  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal,d);
2023  }
2024  }
2025  }
2026  });
2027  }
2028  else
2029  {
2030  // unsupported rank combination -- enforced in constructor
2031  }
2032  }
2033  else
2034  {
2035  if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2) )
2036  {
2037  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
2038  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
2039  {
2040  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2041  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
2042  {
2043  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
2044  {
2045  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
2046  {
2047  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
2048  output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
2049  }
2050  }
2051  }
2052  }
2053  });
2054  }
2055  else if (input1_.rank() == 3) // based on constructor requirements, this means the others are rank 2
2056  {
2057  int spaceDim = input1_.extent_int(2);
2058  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
2059  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
2060  {
2061  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2062  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
2063  {
2064  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
2065  {
2066  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
2067  {
2068  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
2069  for (int d=0; d<spaceDim; d++)
2070  {
2071  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1,d) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
2072  }
2073  }
2074  }
2075  }
2076  }
2077  });
2078  }
2079  else if (input2_.rank() == 3) // based on constructor requirements, this means the others are rank 2
2080  {
2081  int spaceDim = input2_.extent_int(2);
2082  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
2083  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
2084  {
2085  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2086  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
2087  {
2088  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
2089  {
2090  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
2091  {
2092  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
2093  for (int d=0; d<spaceDim; d++)
2094  {
2095  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2,d) * input3_(fieldOrdinal3,pointOrdinal3);
2096  }
2097  }
2098  }
2099  }
2100  }
2101  });
2102  }
2103  else if (input3_.rank() == 3) // based on constructor requirements, this means the others are rank 2
2104  {
2105  int spaceDim = input3_.extent_int(2);
2106  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
2107  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
2108  {
2109  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
2110  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
2111  {
2112  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
2113  {
2114  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
2115  {
2116  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
2117  for (int d=0; d<spaceDim; d++)
2118  {
2119  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3,d);
2120  }
2121  }
2122  }
2123  }
2124  }
2125  });
2126  }
2127  else
2128  {
2129  // unsupported rank combination -- enforced in constructor
2130  }
2131  }
2132  }
2133  }; // TensorBasis3_Functor
2134 
2135 
2136  template<typename BasisBaseClass = void>
2138  : public Basis_TensorBasis<BasisBaseClass>
2139  {
2140  using BasisBase = BasisBaseClass;
2142  public:
2143  using typename BasisBase::OutputViewType;
2144  using typename BasisBase::PointViewType;
2145  using typename BasisBase::ScalarViewType;
2146 
2147  using typename BasisBase::OutputValueType;
2148  using typename BasisBase::PointValueType;
2149 
2150  using typename BasisBase::ExecutionSpace;
2151 
2152  using BasisPtr = Teuchos::RCP<BasisBase>;
2153  protected:
2154  BasisPtr basis1_;
2155  BasisPtr basis2_;
2156  BasisPtr basis3_;
2157  public:
2158  Basis_TensorBasis3(BasisPtr basis1, BasisPtr basis2, BasisPtr basis3, const bool useShardsCellTopologyAndTags = false)
2159  :
2160  TensorBasis(Teuchos::rcp( new TensorBasis(basis1,basis2,FUNCTION_SPACE_MAX,useShardsCellTopologyAndTags)),
2161  basis3,
2162  FUNCTION_SPACE_MAX,useShardsCellTopologyAndTags),
2163  basis1_(basis1),
2164  basis2_(basis2),
2165  basis3_(basis3)
2166  {}
2167 
2174  virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const override
2175  {
2176  OperatorTensorDecomposition opSimpleDecomposition = this->getSimpleOperatorDecomposition(operatorType);
2177  std::vector<BasisPtr> componentBases {basis1_, basis2_, basis3_};
2178  return opSimpleDecomposition.expandedDecomposition(componentBases);
2179  }
2180 
2181  using TensorBasis::getValues;
2182 
2207  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
2208  const PointViewType inputPoints12, const PointViewType inputPoints3,
2209  bool tensorPoints) const override
2210  {
2211  // TODO: rework this to use superclass's getComponentPoints.
2212 
2213  int spaceDim1 = basis1_->getDomainDimension();
2214  int spaceDim2 = basis2_->getDomainDimension();
2215 
2216  int totalSpaceDim12 = inputPoints12.extent_int(1);
2217 
2218  TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim12);
2219 
2220  if (!tensorPoints)
2221  {
2222  auto inputPoints1 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(0,spaceDim1));
2223  auto inputPoints2 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim12));
2224 
2225  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, inputPoints3, tensorPoints);
2226  }
2227  else
2228  {
2229  // superclass doesn't (yet) have a clever way to detect tensor points in a single container
2230  // we'd need something along those lines here to detect them in inputPoints12.
2231  // if we do add such a mechanism to superclass, it should be simple enough to call that from here
2232  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "This method does not yet handle tensorPoints=true");
2233  }
2234  }
2235 
2263  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
2264  const PointViewType inputPoints1, const PointViewType inputPoints2, const PointViewType inputPoints3,
2265  bool tensorPoints) const = 0;
2266 
2294  void getValues( OutputViewType outputValues,
2295  const PointViewType inputPoints1, const EOperator operatorType1,
2296  const PointViewType inputPoints2, const EOperator operatorType2,
2297  const PointViewType inputPoints3, const EOperator operatorType3,
2298  bool tensorPoints, double weight=1.0) const
2299  {
2300  int basisCardinality1 = basis1_->getCardinality();
2301  int basisCardinality2 = basis2_->getCardinality();
2302  int basisCardinality3 = basis3_->getCardinality();
2303 
2304  int spaceDim1 = inputPoints1.extent_int(1);
2305  int spaceDim2 = inputPoints2.extent_int(1);
2306  int spaceDim3 = inputPoints3.extent_int(1);
2307 
2308  int totalPointCount;
2309  int pointCount1, pointCount2, pointCount3;
2310  if (tensorPoints)
2311  {
2312  pointCount1 = inputPoints1.extent_int(0);
2313  pointCount2 = inputPoints2.extent_int(0);
2314  pointCount3 = inputPoints3.extent_int(0);
2315  totalPointCount = pointCount1 * pointCount2 * pointCount3;
2316  }
2317  else
2318  {
2319  totalPointCount = inputPoints1.extent_int(0);
2320  pointCount1 = totalPointCount;
2321  pointCount2 = totalPointCount;
2322  pointCount3 = totalPointCount;
2323 
2324  INTREPID2_TEST_FOR_EXCEPTION((totalPointCount != inputPoints2.extent_int(0)) || (totalPointCount != inputPoints3.extent_int(0)),
2325  std::invalid_argument, "If tensorPoints is false, the point counts must match!");
2326  }
2327 
2328  // structure of this implementation:
2329  /*
2330  - allocate output1, output2, output3 containers
2331  - either:
2332  1. split off the tensor functor call into its own method in TensorBasis, and
2333  - call it once with output1, output2, placing these in another newly allocated output12, then
2334  - call it again with output12, output3
2335  OR
2336  2. create a 3-argument tensor functor and call it with output1,output2,output3
2337 
2338  At the moment, the 3-argument functor seems like a better approach. It's likely more code, but somewhat
2339  more efficient and easier to understand/debug. And the code is fairly straightforward to produce.
2340  */
2341 
2342  // copied from the 2-argument TensorBasis implementation:
2343 
2344  OutputViewType outputValues1, outputValues2, outputValues3;
2345 
2346  //Note: the gradient of HGRAD basis on a line has an output vector of rank 3, the last dimension being of size 1.
2347  // in particular this holds even when computing the divergence of an HDIV basis, which is scalar and has rank 2.
2348  if ((spaceDim1 == 1) && (operatorType1 == OPERATOR_VALUE))
2349  {
2350  // use a rank 2 container for basis1
2351  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
2352  }
2353  else
2354  {
2355  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
2356  }
2357  if ((spaceDim2 == 1) && (operatorType2 == OPERATOR_VALUE))
2358  {
2359  // use a rank 2 container for basis2
2360  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
2361  }
2362  else
2363  {
2364  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
2365  }
2366  if ((spaceDim3 == 1) && (operatorType3 == OPERATOR_VALUE))
2367  {
2368  // use a rank 2 container for basis2
2369  outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3);
2370  }
2371  else
2372  {
2373  outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3,spaceDim3);
2374  }
2375 
2376  basis1_->getValues(outputValues1,inputPoints1,operatorType1);
2377  basis2_->getValues(outputValues2,inputPoints2,operatorType2);
2378  basis3_->getValues(outputValues3,inputPoints3,operatorType3);
2379 
2380  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
2381  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
2382  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
2383 
2384  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
2385 
2387  FunctorType functor(outputValues, outputValues1, outputValues2, outputValues3, tensorPoints, weight);
2388  Kokkos::parallel_for("TensorBasis3_Functor", policy , functor);
2389  }
2390 
2398  virtual void getDofCoeffs( typename BasisBase::ScalarViewType dofCoeffs ) const override
2399  {
2400  using ValueType = typename BasisBase::ScalarViewType::value_type;
2401  using ResultLayout = typename DeduceLayout< typename BasisBase::ScalarViewType >::result_layout;
2402  using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, typename TensorBasis::DeviceType >;
2403 
2404  const ordinal_type basisCardinality1 = basis1_->getCardinality();
2405  const ordinal_type basisCardinality2 = basis2_->getCardinality();
2406  const ordinal_type basisCardinality3 = basis3_->getCardinality();
2407 
2408  auto dofCoeffs1 = ViewType("dofCoeffs1",basisCardinality1);
2409  auto dofCoeffs2 = ViewType("dofCoeffs2",basisCardinality2);
2410  auto dofCoeffs3 = ViewType("dofCoeffs3",basisCardinality3);
2411 
2412  basis1_->getDofCoeffs(dofCoeffs1);
2413  basis2_->getDofCoeffs(dofCoeffs2);
2414  basis3_->getDofCoeffs(dofCoeffs3);
2415 
2416  Kokkos::RangePolicy<ExecutionSpace> policy(0, basisCardinality3);
2417  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const int fieldOrdinal3)
2418  {
2419  for (int fieldOrdinal2=0; fieldOrdinal2<basisCardinality2; fieldOrdinal2++)
2420  for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
2421  {
2422  const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1 + fieldOrdinal3 * (basisCardinality1*basisCardinality2);
2423  dofCoeffs(fieldOrdinal) = dofCoeffs1(fieldOrdinal1);
2424  dofCoeffs(fieldOrdinal) *= dofCoeffs2(fieldOrdinal2) * dofCoeffs3(fieldOrdinal3);
2425  }
2426  });
2427  }
2428 
2433  virtual HostBasisPtr<OutputValueType, PointValueType>
2434  getHostBasis() const override {
2435  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "TensorBasis3 subclasses must override getHostBasis");
2436  }
2437  };
2438 } // end namespace Intrepid2
2439 
2440 #endif /* Intrepid2_TensorBasis_h */
KOKKOS_INLINE_FUNCTION ScalarType getView1Entry()
Functor for computing values for the TensorBasis class.
ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1, ordinal_type dkEnum2, ordinal_type operatorOrder2) const
Given &quot;Dk&quot; enumeration indices for the component bases, returns a Dk enumeration index for the compos...
KOKKOS_INLINE_FUNCTION int numFamilies() const
For valid vectorData, returns the number of families in vectorData; otherwise, returns number of Tens...
Implements arbitrary-dimensional extrusion of a base shards::CellTopology.
View-like interface to tensor points; point components are stored separately; the appropriate coordin...
#define INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(test, x, msg)
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
void getValues(OutputViewType outputValues, const PointViewType inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell.
KOKKOS_INLINE_FUNCTION enable_if_t< rank==1, const Kokkos::View< typename RankExpander< DataScalar, rank >::value_type, DeviceType > & > getUnderlyingView() const
Returns the underlying view. Throws an exception if the underlying view is not rank 1...
For a multi-component tensor basis, specifies the operators to be applied to the components to produc...
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints12, const PointViewType inputPoints3, bool tensorPoints) const override
Evaluation of a tensor FEM basis on a reference cell.
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
virtual void getValues(BasisValues< OutputValueType, DeviceType > outputValues, const TensorPoints< PointValueType, DeviceType > inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell, using point and output value containers that allow pre...
void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition, PointViewType &inputPoints1, PointViewType &inputPoints2, bool &tensorDecompositionSucceeded) const
Method to extract component points from composite points.
Header function for Intrepid2::Util class and other utility functions.
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell.
Implementation of an assert that can safely be called from device code.
const VectorDataType & vectorData() const
VectorData accessor.
Basis_TensorBasis(BasisPtr basis1, BasisPtr basis2, EFunctionSpace functionSpace=FUNCTION_SPACE_MAX, const bool useShardsCellTopologyAndTags=false)
Constructor.
The data containers in Intrepid2 that support sum factorization and other reduced-data optimizations ...
bool rotateXYNinetyDegrees() const
If true, this flag indicates that a vector component that spans the first two dimensions should be ro...
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
KOKKOS_INLINE_FUNCTION void set(ScalarType value)
KOKKOS_INLINE_FUNCTION void setLocation(const Kokkos::Array< int, 7 > location)
KOKKOS_INLINE_FUNCTION ordinal_type numTensorComponents() const
Returns the number of tensorial components.
KOKKOS_INLINE_FUNCTION ScalarType getView2Entry()
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
virtual OperatorTensorDecomposition getSimpleOperatorDecomposition(const EOperator &operatorType) const
Returns a simple decomposition of the specified operator: what operator(s) should be applied to basis...
static CellTopoPtr cellTopology(const shards::CellTopology &shardsCellTopo, ordinal_type tensorialDegree=0)
static accessor that returns a CellTopoPtr; these are lazily constructed and cached.
TensorDataType & tensorData()
TensorData accessor for single-family scalar data.
virtual void getDofCoords(typename BasisBase::ScalarViewType dofCoords) const override
Fills in spatial locations (coordinates) of degrees of freedom (nodes) on the reference cell...
virtual const char * getName() const override
Returns basis name.
Implementation of support for traversing component views alongside a view that represents a combinati...
virtual BasisValues< OutputValueType, DeviceType > allocateBasisValues(TensorPoints< PointValueType, DeviceType > points, const EOperator operatorType=OPERATOR_VALUE) const override
Allocate BasisValues container suitable for passing to the getValues() variant that takes a TensorPoi...
KOKKOS_INLINE_FUNCTION int nextIncrementRank()
virtual void getDofCoeffs(typename BasisBase::ScalarViewType dofCoeffs) const override
Fills in coefficients of degrees of freedom on the reference cell.
ordinal_type componentPointCount(const ordinal_type &tensorComponentOrdinal) const
Returns the number of points in the indicated component.
virtual void getDofCoeffs(typename BasisBase::ScalarViewType dofCoeffs) const override
Fills in coefficients of degrees of freedom on the reference cell.
OperatorTensorDecomposition expandedDecomposition(std::vector< Teuchos::RCP< Basis< DeviceType, OutputValueType, PointValueType > > > &bases)
takes as argument bases that are components in this decomposition, and decomposes them further if the...
Class that defines mappings from component cell topologies to their tensor product topologies...
virtual HostBasisPtr< OutputValueType, PointValueType > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
View-like interface to tensor data; tensor components are stored separately and multiplied together a...
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints1, const PointViewType inputPoints2, bool tensorPoints) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
virtual OperatorTensorDecomposition getOperatorDecomposition(const EOperator operatorType) const override
Returns a full decomposition of the specified operator. (Full meaning that all TensorBasis components...
A helper class that allows iteration over three Kokkos Views simultaneously, according to tensor comb...
Basis defined as the tensor product of two component bases.
static ordinal_type getSubcellOrdinalMap(CellTopoPtr cellTopo, ordinal_type subcdim, ordinal_type subcord, ordinal_type subsubcdim, ordinal_type subsubcord)
Maps the from a subcell within a subcell of the present CellTopology to the subcell in the present Ce...
KOKKOS_INLINE_FUNCTION ScalarView< PointScalar, DeviceType > getTensorComponent(const ordinal_type &r) const
Returns the requested tensor component.
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, const PointViewType inputPoints3, const EOperator operatorType3, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
Reference-space field values for a basis, designed to support typical vector-valued bases...
Functor for computing values for the TensorBasis3 class.
KOKKOS_INLINE_FUNCTION std::enable_if< std::is_integral< iType >::value, int >::type extent_int(const iType &r) const
Returns the logical extent in the requested dimension.
Header file for the abstract base class Intrepid2::Basis.