Intrepid2
Intrepid2_TensorBasis.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ************************************************************************
3 //
4 // Intrepid2 Package
5 // Copyright (2007) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Kyungjoo Kim (kyukim@sandia.gov),
38 // Mauro Perego (mperego@sandia.gov), or
39 // Nate Roberts (nvrober@sandia.gov)
40 //
41 // ************************************************************************
42 // @HEADER
43 
49 #ifndef Intrepid2_TensorBasis_h
50 #define Intrepid2_TensorBasis_h
51 
52 #include <Kokkos_View.hpp>
53 #include <Kokkos_DynRankView.hpp>
54 
55 #include <Intrepid2_config.h>
56 
57 #include <map>
58 #include <set>
59 #include <vector>
60 
61 #include "Intrepid2_Basis.hpp"
65 #include "Intrepid2_Utils.hpp" // defines FAD_VECTOR_SIZE, VECTOR_SIZE
66 
67 namespace Intrepid2
68 {
69  template<unsigned spaceDim>
70  KOKKOS_INLINE_FUNCTION
71  void getDkEnumerationInverse(Kokkos::Array<int,spaceDim> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder);
72 
73  template<>
74  KOKKOS_INLINE_FUNCTION
75  void getDkEnumerationInverse<1>(Kokkos::Array<int,1> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
76  {
77  entries[0] = operatorOrder;
78  }
79 
80  template<>
81  KOKKOS_INLINE_FUNCTION
82  void getDkEnumerationInverse<2>(Kokkos::Array<int,2> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
83  {
84  entries[0] = operatorOrder - dkEnum;
85  entries[1] = dkEnum;
86  }
87 
88  template<>
89  KOKKOS_INLINE_FUNCTION
90  void getDkEnumerationInverse<3>(Kokkos::Array<int,3> &entries, const ordinal_type dkEnum, const ordinal_type operatorOrder)
91  {
92  // formula is zMult + (yMult+zMult)*(yMult+zMult+1)/2; where xMult+yMult+zMult = operatorOrder
93  // it seems complicated to work out a formula that will invert this. For the present we just take a brute force approach,
94  // using getDkEnumeration() to check each possibility
95  for (ordinal_type yMult=0; yMult<=operatorOrder; yMult++)
96  {
97  for (ordinal_type zMult=0; zMult<=operatorOrder-yMult; zMult++)
98  {
99  const ordinal_type xMult = operatorOrder-(zMult+yMult);
100  if (dkEnum == getDkEnumeration<3>(xMult,yMult,zMult))
101  {
102  entries[0] = xMult;
103  entries[1] = yMult;
104  entries[2] = zMult;
105  }
106  }
107  }
108  }
109 
110  template<unsigned spaceDim>
111  ordinal_type getDkEnumeration(Kokkos::Array<int,spaceDim> &entries);
112 
113  template<>
114  inline ordinal_type getDkEnumeration<1>(Kokkos::Array<int,1> &entries)
115  {
116  return getDkEnumeration<1>(entries[0]);
117  }
118 
119  template<>
120  inline ordinal_type getDkEnumeration<2>(Kokkos::Array<int,2> &entries)
121  {
122  return getDkEnumeration<2>(entries[0],entries[1]);
123  }
124 
125  template<>
126  inline ordinal_type getDkEnumeration<3>(Kokkos::Array<int,3> &entries)
127  {
128  return getDkEnumeration<3>(entries[0],entries[1],entries[2]);
129  }
130 
131  template<unsigned spaceDim1, unsigned spaceDim2>
132  inline ordinal_type getDkTensorIndex(const ordinal_type dkEnum1, const ordinal_type operatorOrder1,
133  const ordinal_type dkEnum2, const ordinal_type operatorOrder2)
134  {
135  Kokkos::Array<int,spaceDim1> entries1;
136  getDkEnumerationInverse<spaceDim1>(entries1, dkEnum1, operatorOrder1);
137 
138  Kokkos::Array<int,spaceDim2> entries2;
139  getDkEnumerationInverse<spaceDim2>(entries2, dkEnum2, operatorOrder2);
140 
141  const int spaceDim = spaceDim1 + spaceDim2;
142  Kokkos::Array<int,spaceDim> entries;
143 
144  for (unsigned d=0; d<spaceDim1; d++)
145  {
146  entries[d] = entries1[d];
147  }
148 
149  for (unsigned d=0; d<spaceDim2; d++)
150  {
151  entries[d+spaceDim1] = entries2[d];
152  }
153 
154  return getDkEnumeration<spaceDim>(entries);
155  }
156 
162  template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
164  {
165  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
166  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
167 
168  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
169  using TeamMember = typename TeamPolicy::member_type;
170 
172  using RankCombinationType = typename TensorViewIteratorType::RankCombinationType;
173 
174  OutputFieldType output_; // F,P[,D…]
175  OutputFieldType input1_; // F1,P[,D…] or F1,P1[,D…]
176  OutputFieldType input2_; // F2,P[,D…] or F2,P2[,D…]
177 
178  int numFields_, numPoints_;
179  int numFields1_, numPoints1_;
180  int numFields2_, numPoints2_;
181 
182  bool tensorPoints_; // if true, input1 and input2 refer to values at decomposed points, and P = P1 * P2. If false, then the two inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
183 
184  Kokkos::vector<RankCombinationType> rank_combinations_; // indicates the policy by which the input views will be combined in output view
185 
186  double weight_;
187 
188  public:
189 
190  TensorViewFunctor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2,
191  bool tensorPoints, double weight)
192  : output_(output), input1_(inputValues1), input2_(inputValues2), tensorPoints_(tensorPoints), weight_(weight)
193  {
194  numFields_ = output.extent_int(0);
195  numPoints_ = output.extent_int(1);
196 
197  numFields1_ = inputValues1.extent_int(0);
198  numPoints1_ = inputValues1.extent_int(1);
199 
200  numFields2_ = inputValues2.extent_int(0);
201  numPoints2_ = inputValues2.extent_int(1);
202 
203  if (!tensorPoints_)
204  {
205  // then the point counts should all match
206  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
207  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
208  }
209  else
210  {
211  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_, std::invalid_argument, "incompatible point counts");
212  }
213 
214  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_, std::invalid_argument, "incompatible field sizes");
215 
216  unsigned max_rank = std::max(inputValues1.rank(),inputValues2.rank());
217  // at present, no supported case will result in an output rank greater than both input ranks
218  INTREPID2_TEST_FOR_EXCEPTION(output.rank() > max_rank, std::invalid_argument, "Unsupported view combination.");
219  rank_combinations_ = Kokkos::vector<RankCombinationType>(max_rank);
220 
221  rank_combinations_[0] = TensorViewIteratorType::TENSOR_PRODUCT; // field combination is always tensor product
222  rank_combinations_[1] = tensorPoints ? TensorViewIteratorType::TENSOR_PRODUCT : TensorViewIteratorType::DIMENSION_MATCH; // tensorPoints controls interpretation of the point dimension
223  for (unsigned d=2; d<max_rank; d++)
224  {
225  // d >= 2 have the interpretation of spatial dimensions (gradients, etc.)
226  // we let the extents of the containers determine what we're doing here
227  if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == 1))
228  {
229  rank_combinations_[d] = TensorViewIteratorType::TENSOR_CONTRACTION;
230  }
231  else if (((inputValues1.extent_int(d) == output.extent_int(d)) && (inputValues2.extent_int(d) == 1))
232  || ((inputValues2.extent_int(d) == output.extent_int(d)) && (inputValues1.extent_int(d) == 1))
233  )
234  {
235  // this looks like multiplication of a vector by a scalar, resulting in a vector
236  // this can be understood as a tensor product
237  rank_combinations_[d] = TensorViewIteratorType::TENSOR_PRODUCT;
238  }
239  else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d) * inputValues2.extent_int(d)))
240  {
241  // this is actually a generalization of the above case: a tensor product, something like a vector outer product
242  rank_combinations_[d] = TensorViewIteratorType::TENSOR_PRODUCT;
243  }
244  else if ((inputValues1.extent_int(d) == inputValues2.extent_int(d)) && (output.extent_int(d) == inputValues1.extent_int(d)))
245  {
246  // it's a bit weird (I'm not aware of the use case, in the present context), but we can handle this case by adopting DIMENSION_MATCH here
247  // this is something like MATLAB's .* and .+ operators, which operate entry-wise
248  rank_combinations_[d] = TensorViewIteratorType::DIMENSION_MATCH;
249  }
250  else
251  {
252  std::cout << "inputValues1.extent_int(" << d << ") = " << inputValues1.extent_int(d) << std::endl;
253  std::cout << "inputValues2.extent_int(" << d << ") = " << inputValues2.extent_int(d) << std::endl;
254  std::cout << "output.extent_int(" << d << ") = " << output.extent_int(d) << std::endl;
255  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "unable to find an interpretation for this combination of views");
256  }
257  }
258  }
259 
260  KOKKOS_INLINE_FUNCTION
261  void operator()( const TeamMember & teamMember ) const
262  {
263  auto fieldOrdinal1 = teamMember.league_rank();
264 
265  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
266  TensorViewIteratorType it(output_,input1_,input2_,rank_combinations_);
267  const int FIELD_ORDINAL_DIMENSION = 0;
268  it.setLocation({fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0});
269  int next_increment_rank = FIELD_ORDINAL_DIMENSION; // used to initialize prev_increment_rank at the start of the do/while loop. Notionally, we last incremented in the field ordinal rank to get to the {fieldOrdinal1,0,0,0,0,0,0},{fieldOrdinal2,0,0,0,0,0,0} location.
270  OutputScalar accumulator = 0;
271 
272  do
273  {
274  accumulator += weight_ * it.getView1Entry() * it.getView2Entry();
275  next_increment_rank = it.nextIncrementRank();
276 
277  if ((next_increment_rank < 0) || (rank_combinations_[next_increment_rank] != TensorViewIteratorType::TENSOR_CONTRACTION))
278  {
279  // then we've finished the accumulation and should set the value
280  it.set(accumulator);
281  // reset the accumulator:
282  accumulator = 0;
283  }
284  } while (it.increment() > FIELD_ORDINAL_DIMENSION);
285  });
286  }
287  };
288 
302  template<typename Basis1, typename Basis2>
304  :
305  public Basis<typename Basis1::ExecutionSpace,typename Basis1::OutputValueType,typename Basis1::PointValueType>
306  {
307  protected:
308  Basis1 basis1_;
309  Basis2 basis2_;
310 
311  std::string name_; // name of the basis
312  public:
314 
315  using ExecutionSpace = typename BasisSuper::ExecutionSpace;
316  using OutputValueType = typename BasisSuper::OutputValueType;
317  using PointValueType = typename BasisSuper::PointValueType;
318 
319  using OrdinalTypeArray1DHost = typename BasisSuper::OrdinalTypeArray1DHost;
320  using OrdinalTypeArray2DHost = typename BasisSuper::OrdinalTypeArray2DHost;
321  using OutputViewType = typename BasisSuper::OutputViewType;
322  using PointViewType = typename BasisSuper::PointViewType;
323  public:
328  Basis_TensorBasis(Basis1 basis1, Basis2 basis2)
329  :
330  basis1_(basis1),basis2_(basis2)
331  {
332  this->basisCardinality_ = basis1.getCardinality() * basis2.getCardinality();
333  this->basisDegree_ = std::max(basis1.getDegree(), basis2.getDegree());
334 
335  {
336  std::ostringstream basisName;
337  basisName << basis1.getName() << " x " << basis2.getName();
338  name_ = basisName.str();
339  }
340 
341  // set cell topology
342  shards::CellTopology cellTopo1 = basis1.getBaseCellTopology();
343  shards::CellTopology cellTopo2 = basis2.getBaseCellTopology();
344 
345  auto cellKey1 = basis1.getBaseCellTopology().getKey();
346  auto cellKey2 = basis2.getBaseCellTopology().getKey();
347  if ((cellKey1 == shards::Line<2>::key) && (cellKey2 == shards::Line<2>::key))
348  {
349  this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() );
350  }
351  else if (((cellKey1 == shards::Quadrilateral<4>::key) && (cellKey2 == shards::Line<2>::key))
352  || ((cellKey2 == shards::Quadrilateral<4>::key) && (cellKey1 == shards::Line<2>::key)))
353  {
354  this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Hexahedron<8> >() );
355  }
356  else
357  {
358  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Cell topology combination not yet supported");
359  }
360 
361  this->basisType_ = basis1.getBasisType();
362  this->basisCoordinates_ = COORDINATES_CARTESIAN;
363 
364  // initialize tags
365  {
366  const auto & cardinality = this->basisCardinality_;
367 
368  // Basis-dependent initializations
369  const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
370  const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
371  const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
372  const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
373 
374  OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
375 
376  shards::CellTopology cellTopo = this->basisCellTopology_;
377 
378  int tensorSpaceDim = cellTopo.getDimension();
379  int spaceDim1 = cellTopo1.getDimension();
380  int spaceDim2 = cellTopo2.getDimension();
381 
382  if (this->getBasisType() == BASIS_FEM_HIERARCHICAL)
383  {
384  int degreeSize = basis1_.getPolynomialDegreeLength() + basis2_.getPolynomialDegreeLength();
385  this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("TensorBasis - field ordinal polynomial degree", this->basisCardinality_, degreeSize);
386  }
387 
388  TensorTopologyMap topoMap(cellTopo1, cellTopo2);
389 
390  for (int d=0; d<=tensorSpaceDim; d++) // d: tensorial dimension
391  {
392  int d2_max = std::min(spaceDim2,d);
393  int subcellOffset = 0; // for this dimension of tensor subcells, how many subcells have we already counted with other d2/d1 combos?
394  for (int d2=0; d2<=d2_max; d2++)
395  {
396  int d1 = d-d2;
397  if (d1 > spaceDim1) continue;
398 
399  unsigned subcellCount2 = cellTopo2.getSubcellCount(d2);
400  unsigned subcellCount1 = cellTopo1.getSubcellCount(d1);
401  for (unsigned subcellOrdinal2=0; subcellOrdinal2<subcellCount2; subcellOrdinal2++)
402  {
403  ordinal_type subcellDofCount2 = basis2_.getDofCount(d2, subcellOrdinal2);
404  for (unsigned subcellOrdinal1=0; subcellOrdinal1<subcellCount1; subcellOrdinal1++)
405  {
406  ordinal_type subcellDofCount1 = basis1_.getDofCount(d1, subcellOrdinal1);
407  ordinal_type tensorLocalDofCount = subcellDofCount1 * subcellDofCount2;
408  for (ordinal_type localDofID2 = 0; localDofID2<subcellDofCount2; localDofID2++)
409  {
410  ordinal_type fieldOrdinal2 = basis2_.getDofOrdinal(d2, subcellOrdinal2, localDofID2);
411  OrdinalTypeArray1DHost degreesField2;
412  if (this->basisType_ == BASIS_FEM_HIERARCHICAL) degreesField2 = basis2_.getPolynomialDegreeOfField(fieldOrdinal2);
413  for (ordinal_type localDofID1 = 0; localDofID1<subcellDofCount1; localDofID1++)
414  {
415  ordinal_type fieldOrdinal1 = basis1_.getDofOrdinal(d1, subcellOrdinal1, localDofID1);
416  ordinal_type tensorLocalDofID = localDofID2 * subcellDofCount1 + localDofID1;
417  ordinal_type tensorFieldOrdinal = fieldOrdinal2 * basis1_.getCardinality() + fieldOrdinal1;
418  tagView(tensorFieldOrdinal*tagSize+0) = d; // subcell dimension
419  tagView(tensorFieldOrdinal*tagSize+1) = topoMap.getCompositeSubcellOrdinal(d1, subcellOrdinal1, d2, subcellOrdinal2);
420  tagView(tensorFieldOrdinal*tagSize+2) = tensorLocalDofID;
421  tagView(tensorFieldOrdinal*tagSize+3) = tensorLocalDofCount;
422 
423  if (this->basisType_ == BASIS_FEM_HIERARCHICAL)
424  {
425  // fill in degree lookup:
426  OrdinalTypeArray1DHost degreesField1 = basis1_.getPolynomialDegreeOfField(fieldOrdinal1);
427 
428  int degreeLengthField1 = degreesField1.extent_int(0);
429  int degreeLengthField2 = degreesField2.extent_int(0);
430  for (int d3=0; d3<degreeLengthField1; d3++)
431  {
432  this->fieldOrdinalPolynomialDegree_(tensorFieldOrdinal,d3) = degreesField1(d3);
433  }
434  for (int d3=0; d3<degreeLengthField2; d3++)
435  {
436  this->fieldOrdinalPolynomialDegree_(tensorFieldOrdinal,d3+degreeLengthField1) = degreesField2(d3);
437  }
438  }
439  } // localDofID1
440  } // localDofID2
441  } // subcellOrdinal1
442  } // subcellOrdinal2
443  subcellOffset += subcellCount1 * subcellCount2;
444  }
445  }
446 
447  // // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
448  // // tags are constructed on host
449  this->setOrdinalTagData(this->tagToOrdinal_,
450  this->ordinalToTag_,
451  tagView,
452  this->basisCardinality_,
453  tagSize,
454  posScDim,
455  posScOrd,
456  posDfOrd);
457  }
458  }
459 
460  // since the getValues() below only overrides the FEM variant, we specify that
461  // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
462  // (It's an error to use the FVD variant on this basis.)
463  using BasisSuper::getValues;
464 
476  void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition,
477  PointViewType & inputPoints1, PointViewType & inputPoints2, bool &tensorDecompositionSucceeded) const
478  {
479  INTREPID2_TEST_FOR_EXCEPTION(attemptTensorDecomposition, std::invalid_argument, "tensor decomposition not yet supported");
480 
481  // for inputPoints that are actually tensor-product of component quadrature points (say),
482  // having just the one input (which will have a lot of redundant point data) is suboptimal
483  // The general case can have unique x/y/z coordinates at every point, though, so we have to support that
484  // when this interface is used. But we may try detecting that the data is tensor-product and compressing
485  // from there... Ultimately, we should also add a getValues() variant that takes multiple input point containers,
486  // one for each tensorial dimension.
487 
488  // this initial implementation is intended to simplify development of 2D and 3D bases, while also opening
489  // the possibility of higher-dimensional bases. It is not necessarily optimized for speed/memory. There
490  // are things we can do in this regard, which may become important for matrix-free computations wherein
491  // basis values don't get stored but are computed dynamically.
492 
493  int spaceDim1 = basis1_.getBaseCellTopology().getDimension();
494  int spaceDim2 = basis2_.getBaseCellTopology().getDimension();
495 
496  int totalSpaceDim = inputPoints.extent_int(1);
497 
498  TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim);
499 
500  // first pass: just take subviews to get input points -- this will result in redundant computations when points are themselves tensor product (i.e., inputPoints itself contains redundant data)
501 
502  inputPoints1 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(0,spaceDim1));
503  inputPoints2 = Kokkos::subview(inputPoints,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim));
504 
505  // std::cout << "inputPoints : " << inputPoints.extent(0) << " x " << inputPoints.extent(1) << std::endl;
506  // std::cout << "inputPoints1 : " << inputPoints1.extent(0) << " x " << inputPoints1.extent(1) << std::endl;
507  // std::cout << "inputPoints2 : " << inputPoints2.extent(0) << " x " << inputPoints2.extent(1) << std::endl;
508 
509  tensorDecompositionSucceeded = false;
510  }
511 
520  virtual void getDofCoords( typename BasisSuper::ScalarViewType dofCoords ) const override
521  {
522  int spaceDim1 = basis1_.getBaseCellTopology().getDimension();
523  int spaceDim2 = basis2_.getBaseCellTopology().getDimension();
524 
525  using ValueType = typename BasisSuper::ScalarViewType::value_type;
526  using ResultLayout = typename DeduceLayout< typename BasisSuper::ScalarViewType >::result_layout;
527  using DeviceType = typename BasisSuper::ScalarViewType::device_type;
528  using ViewType = Kokkos::DynRankView<ValueType, ResultLayout, DeviceType >;
529 
530  ViewType dofCoords1("dofCoords1",basis1_.getCardinality(),spaceDim1);
531  ViewType dofCoords2("dofCoords2",basis2_.getCardinality(),spaceDim2);
532 
533  basis1_.getDofCoords(dofCoords1);
534  basis2_.getDofCoords(dofCoords2);
535 
536  const ordinal_type basisCardinality1 = basis1_.getCardinality();
537  const ordinal_type basisCardinality2 = basis2_.getCardinality();
538 
539  Kokkos::parallel_for(basisCardinality2, KOKKOS_LAMBDA (const int fieldOrdinal2)
540  {
541  for (int fieldOrdinal1=0; fieldOrdinal1<basisCardinality1; fieldOrdinal1++)
542  {
543  const ordinal_type fieldOrdinal = fieldOrdinal1 + fieldOrdinal2 * basisCardinality1;
544  for (int d1=0; d1<spaceDim1; d1++)
545  {
546  dofCoords(fieldOrdinal,d1) = dofCoords1(fieldOrdinal1,d1);
547  }
548  for (int d2=0; d2<spaceDim2; d2++)
549  {
550  dofCoords(fieldOrdinal,spaceDim1+d2) = dofCoords2(fieldOrdinal2,d2);
551  }
552  }
553  });
554  }
555 
560  virtual
561  const char*
562  getName() const override {
563  return name_.c_str();
564  }
565 
574  ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1,
575  ordinal_type dkEnum2, ordinal_type operatorOrder2) const
576  {
577  unsigned spaceDim1 = basis1_.getBaseCellTopology().getDimension();
578  unsigned spaceDim2 = basis2_.getBaseCellTopology().getDimension();
579 
580  // for now, we only support total spaceDim <= 3. It would not be too hard to extend to support higher dimensions,
581  // but the support needs to be built out in e.g. shards::CellTopology for this, as well as our DkEnumeration, etc.
582  switch (spaceDim1)
583  {
584  case 1:
585  switch (spaceDim2)
586  {
587  case 1:
588  return getDkTensorIndex<1, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
589  case 2:
590  return getDkTensorIndex<1, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
591  default:
592  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
593  }
594  case 2:
595  switch (spaceDim2)
596  {
597  case 1:
598  return getDkTensorIndex<2, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
599  default:
600  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
601  }
602  // case 3:
603  // switch (spaceDim2)
604  // {
605  // case 1:
606  // return getDkTensorIndex<3, 1>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
607  // case 2:
608  // return getDkTensorIndex<3, 2>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
609  // case 3:
610  // return getDkTensorIndex<3, 3>(dkEnum1, operatorOrder1, dkEnum2, operatorOrder2);
611  // default:
612  // INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
613  // }
614  default:
615  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported dimension combination");
616  }
617  }
618 
637  void getValues( OutputViewType outputValues, const PointViewType inputPoints,
638  const EOperator operatorType = OPERATOR_VALUE ) const override
639  {
640  bool tensorPoints; // true would mean that we take the tensor product of inputPoints1 and inputPoints2 (and that this would be equivalent to inputPoints as given -- i.e., inputPoints1 and inputPoints2 would be a tensor decomposition of inputPoints)
641  bool attemptTensorDecomposition = false; // support for this not yet implemented
642  PointViewType inputPoints1, inputPoints2;
643  getComponentPoints(inputPoints, attemptTensorDecomposition, inputPoints1, inputPoints2, tensorPoints);
644 
645  switch (operatorType)
646  {
647  case OPERATOR_D1:
648  case OPERATOR_D2:
649  case OPERATOR_D3:
650  case OPERATOR_D4:
651  case OPERATOR_D5:
652  case OPERATOR_D6:
653  case OPERATOR_D7:
654  case OPERATOR_D8:
655  case OPERATOR_D9:
656  case OPERATOR_D10:
657  {
658  auto opOrder = getOperatorOrder(operatorType); // number of derivatives that we take in total
659  // the Dk enumeration happens in lexicographic order (reading from left to right: x, y, z, etc.)
660  // this governs the nesting order of the dkEnum1, dkEnum2 for loops below: dkEnum2 should increment fastest.
661  for (int derivativeCountComp2=0; derivativeCountComp2<=opOrder; derivativeCountComp2++)
662  {
663  int derivativeCountComp1=opOrder-derivativeCountComp2;
664  EOperator op1 = (derivativeCountComp1 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp1 - 1));
665  EOperator op2 = (derivativeCountComp2 == 0) ? OPERATOR_VALUE : EOperator(OPERATOR_D1 + (derivativeCountComp2 - 1));
666 
667  int spaceDim1 = inputPoints1.extent_int(1);
668  int spaceDim2 = inputPoints2.extent_int(1);
669 
670  int dkCardinality1 = (op1 != OPERATOR_VALUE) ? getDkCardinality(op1, spaceDim1) : 1;
671  int dkCardinality2 = (op2 != OPERATOR_VALUE) ? getDkCardinality(op2, spaceDim2) : 1;
672 
673  int basisCardinality1 = basis1_.getCardinality();
674  int basisCardinality2 = basis2_.getCardinality();
675 
676  int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
677 
678  int pointCount1, pointCount2;
679  if (tensorPoints)
680  {
681  pointCount1 = inputPoints1.extent_int(0);
682  pointCount2 = inputPoints2.extent_int(0);
683  }
684  else
685  {
686  pointCount1 = totalPointCount;
687  pointCount2 = totalPointCount;
688  }
689 
690  OutputViewType outputValues1, outputValues2;
691  if (op1 == OPERATOR_VALUE)
692  outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1);
693  else
694  outputValues1 = getMatchingViewWithLabel(outputValues, "output values - basis 1",basisCardinality1,pointCount1,dkCardinality1);
695 
696  if (op2 == OPERATOR_VALUE)
697  outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2);
698  else
699  outputValues2 = getMatchingViewWithLabel(outputValues, "output values - basis 2",basisCardinality2,pointCount2,dkCardinality2);
700 
701  basis1_.getValues(outputValues1,inputPoints1,op1);
702  basis2_.getValues(outputValues2,inputPoints2,op2);
703 
704  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
705  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
706  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
707 
708  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
709 
710  double weight = 1.0;
712 
713  for (int dkEnum1=0; dkEnum1<dkCardinality1; dkEnum1++)
714  {
715  auto outputValues1_dkEnum1 = (op1 != OPERATOR_VALUE) ? Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL(),dkEnum1)
716  : Kokkos::subview(outputValues1,Kokkos::ALL(),Kokkos::ALL());
717  for (int dkEnum2=0; dkEnum2<dkCardinality2; dkEnum2++)
718  {
719  auto outputValues2_dkEnum2 = (op2 != OPERATOR_VALUE) ? Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL(),dkEnum2)
720  : Kokkos::subview(outputValues2,Kokkos::ALL(),Kokkos::ALL());
721 
722  ordinal_type dkTensorIndex = getTensorDkEnumeration(dkEnum1, derivativeCountComp1, dkEnum2, derivativeCountComp2);
723  auto outputValues_dkTensor = Kokkos::subview(outputValues,Kokkos::ALL(),Kokkos::ALL(),dkTensorIndex);
724  // Note that there may be performance optimizations available here:
725  // - could eliminate interior for loop in favor of having a vector-valued outputValues1_dk
726  // - could add support to TensorViewFunctor (and probably TensorViewIterator) for this kind of tensor Dk type of traversal
727  // (this would allow us to eliminate both for loops here)
728  // At the moment, we defer such optimizations on the idea that this may not ever become a performance bottleneck.
729  FunctorType functor(outputValues_dkTensor, outputValues1_dkEnum1, outputValues2_dkEnum2, tensorPoints, weight);
730  Kokkos::parallel_for( policy , functor, "TensorViewFunctor");
731  }
732  }
733  }
734  }
735  break;
736  default: // non-OPERATOR_Dn case must be handled by subclass.
737  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, tensorPoints);
738  }
739  }
740 
766  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
767  const PointViewType inputPoints1, const PointViewType inputPoints2,
768  bool tensorPoints) const
769  {
770  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "one-operator, two-inputPoints getValues should be overridden by TensorBasis subclasses");
771  }
772 
796  void getValues( OutputViewType outputValues,
797  const PointViewType inputPoints1, const EOperator operatorType1,
798  const PointViewType inputPoints2, const EOperator operatorType2,
799  bool tensorPoints, double weight=1.0) const
800  {
801  int basisCardinality1 = basis1_.getCardinality();
802  int basisCardinality2 = basis2_.getCardinality();
803 
804  int totalPointCount = tensorPoints ? inputPoints1.extent_int(0) * inputPoints2.extent_int(0) : inputPoints1.extent_int(0);
805 
806  int pointCount1, pointCount2;
807  if (tensorPoints)
808  {
809  pointCount1 = inputPoints1.extent_int(0);
810  pointCount2 = inputPoints2.extent_int(0);
811  }
812  else
813  {
814  pointCount1 = totalPointCount;
815  pointCount2 = totalPointCount;
816  }
817 
818  int spaceDim1 = inputPoints1.extent_int(1);
819  int spaceDim2 = inputPoints2.extent_int(1);
820 
821  INTREPID2_TEST_FOR_EXCEPTION(!tensorPoints && (totalPointCount != inputPoints2.extent_int(0)),
822  std::invalid_argument, "If tensorPoints is false, the point counts must match!");
823 
824  int opRank1 = getOperatorRank(basis1_.getFunctionSpace(), operatorType1, spaceDim1);
825  int opRank2 = getOperatorRank(basis2_.getFunctionSpace(), operatorType2, spaceDim2);
826 
827  OutputViewType outputValues1, outputValues2;
828  if (opRank1 == 0)
829  {
830  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
831  }
832  else if (opRank1 == 1)
833  {
834  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
835  }
836  else
837  {
838  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank1");
839  }
840 
841  if (opRank2 == 0)
842  {
843  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
844  }
845  else if (opRank2 == 1)
846  {
847  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
848  }
849  else
850  {
851  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported opRank2");
852  }
853 
854  basis1_.getValues(outputValues1,inputPoints1,operatorType1);
855  basis2_.getValues(outputValues2,inputPoints2,operatorType2);
856 
857  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputValueType>();
858  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointValueType>();
859  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
860 
861  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
862 
864 
865  FunctorType functor(outputValues, outputValues1, outputValues2, tensorPoints, weight);
866  Kokkos::parallel_for( policy , functor, "TensorViewFunctor");
867  }
868  };
869 
877  template<class ExecutionSpace, class OutputScalar, class OutputFieldType>
879  {
880  using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
881  using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
882 
883  using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
884  using TeamMember = typename TeamPolicy::member_type;
885 
886  OutputFieldType output_; // F,P
887  OutputFieldType input1_; // F1,P[,D] or F1,P1[,D]
888  OutputFieldType input2_; // F2,P[,D] or F2,P2[,D]
889  OutputFieldType input3_; // F2,P[,D] or F2,P2[,D]
890 
891  int numFields_, numPoints_;
892  int numFields1_, numPoints1_;
893  int numFields2_, numPoints2_;
894  int numFields3_, numPoints3_;
895 
896  bool tensorPoints_; // if true, input1, input2, input3 refer to values at decomposed points, and P = P1 * P2 * P3. If false, then the three inputs refer to points in the full-dimensional space, and their point lengths are the same as that of the final output.
897 
898  double weight_;
899 
900  TensorBasis3_Functor(OutputFieldType output, OutputFieldType inputValues1, OutputFieldType inputValues2, OutputFieldType inputValues3,
901  bool tensorPoints, double weight)
902  : output_(output), input1_(inputValues1), input2_(inputValues2), input3_(inputValues3), tensorPoints_(tensorPoints), weight_(weight)
903  {
904  numFields_ = output.extent_int(0);
905  numPoints_ = output.extent_int(1);
906 
907  numFields1_ = inputValues1.extent_int(0);
908  numPoints1_ = inputValues1.extent_int(1);
909 
910  numFields2_ = inputValues2.extent_int(0);
911  numPoints2_ = inputValues2.extent_int(1);
912 
913  numFields3_ = inputValues3.extent_int(0);
914  numPoints3_ = inputValues3.extent_int(1);
915  /*
916  We don't yet support tensor-valued bases here (only vector and scalar). The main design question is how the layouts
917  of the input containers relates to the layout of the output container. The work we've done in TensorViewIterator basically
918  shows the choices that can be made. It does appear that in most cases (at least (most of?) those supported by TensorViewIterator),
919  we can infer from the dimensions of input/output containers what choice should be made in each dimension.
920  */
921  INTREPID2_TEST_FOR_EXCEPTION(inputValues1.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
922  INTREPID2_TEST_FOR_EXCEPTION(inputValues2.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
923  INTREPID2_TEST_FOR_EXCEPTION(inputValues3.rank() > 3, std::invalid_argument, "ranks greater than 3 not yet supported");
924  INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues2.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
925  INTREPID2_TEST_FOR_EXCEPTION((inputValues1.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
926  INTREPID2_TEST_FOR_EXCEPTION((inputValues2.rank() == 3) && (inputValues3.rank() == 3), std::invalid_argument, "two vector-valued input ranks not yet supported");
927 
928  if (!tensorPoints_)
929  {
930  // then the point counts should all match
931  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_, std::invalid_argument, "incompatible point counts");
932  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints2_, std::invalid_argument, "incompatible point counts");
933  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints3_, std::invalid_argument, "incompatible point counts");
934  }
935  else
936  {
937  INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != numPoints1_ * numPoints2_ * numPoints3_, std::invalid_argument, "incompatible point counts");
938  }
939 
940  INTREPID2_TEST_FOR_EXCEPTION(numFields_ != numFields1_ * numFields2_ * numFields3_, std::invalid_argument, "incompatible field sizes");
941  }
942 
943  KOKKOS_INLINE_FUNCTION
944  void operator()( const TeamMember & teamMember ) const
945  {
946  auto fieldOrdinal1 = teamMember.league_rank();
947 
948  if (!tensorPoints_)
949  {
950  if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2))
951  {
952  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
953  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
954  {
955  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
956  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
957  {
958  output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
959  }
960  }
961  });
962  }
963  else if (input1_.rank() == 3)
964  {
965  int spaceDim = input1_.extent_int(2);
966  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
967  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
968  {
969  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
970  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
971  {
972  for (int d=0; d<spaceDim; d++)
973  {
974  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal,d) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal);
975  }
976  }
977  }
978  });
979  }
980  else if (input2_.rank() == 3)
981  {
982  int spaceDim = input2_.extent_int(2);
983  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
984  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
985  {
986  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
987  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
988  {
989  for (int d=0; d<spaceDim; d++)
990  {
991  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal,d) * input3_(fieldOrdinal3,pointOrdinal);
992  }
993  }
994  }
995  });
996  }
997  else if (input3_.rank() == 3)
998  {
999  int spaceDim = input3_.extent_int(2);
1000  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1001  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1002  {
1003  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1004  for (int pointOrdinal=0; pointOrdinal<numPoints_; pointOrdinal++)
1005  {
1006  for (int d=0; d<spaceDim; d++)
1007  {
1008  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal) * input2_(fieldOrdinal2,pointOrdinal) * input3_(fieldOrdinal3,pointOrdinal,d);
1009  }
1010  }
1011  }
1012  });
1013  }
1014  else
1015  {
1016  // unsupported rank combination -- enforced in constructor
1017  }
1018  }
1019  else
1020  {
1021  if ((input1_.rank() == 2) && (input2_.rank() == 2) && (input3_.rank() == 2) )
1022  {
1023  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1024  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1025  {
1026  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1027  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1028  {
1029  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1030  {
1031  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1032  {
1033  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1034  output_(fieldOrdinal,pointOrdinal) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
1035  }
1036  }
1037  }
1038  }
1039  });
1040  }
1041  else if (input1_.rank() == 3) // based on constructor requirements, this means the others are rank 2
1042  {
1043  int spaceDim = input1_.extent_int(2);
1044  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1045  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1046  {
1047  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1048  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1049  {
1050  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1051  {
1052  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1053  {
1054  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1055  for (int d=0; d<spaceDim; d++)
1056  {
1057  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1,d) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3);
1058  }
1059  }
1060  }
1061  }
1062  }
1063  });
1064  }
1065  else if (input2_.rank() == 3) // based on constructor requirements, this means the others are rank 2
1066  {
1067  int spaceDim = input2_.extent_int(2);
1068  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1069  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1070  {
1071  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1072  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1073  {
1074  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1075  {
1076  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1077  {
1078  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1079  for (int d=0; d<spaceDim; d++)
1080  {
1081  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2,d) * input3_(fieldOrdinal3,pointOrdinal3);
1082  }
1083  }
1084  }
1085  }
1086  }
1087  });
1088  }
1089  else if (input3_.rank() == 3) // based on constructor requirements, this means the others are rank 2
1090  {
1091  int spaceDim = input3_.extent_int(2);
1092  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numFields2_), [&] (const int& fieldOrdinal2) {
1093  for (int fieldOrdinal3=0; fieldOrdinal3 < numFields3_; fieldOrdinal3++)
1094  {
1095  int fieldOrdinal = (fieldOrdinal3 * numFields2_ + fieldOrdinal2) * numFields1_ + fieldOrdinal1;
1096  for (int pointOrdinal3=0; pointOrdinal3<numPoints3_; pointOrdinal3++)
1097  {
1098  for (int pointOrdinal2=0; pointOrdinal2<numPoints2_; pointOrdinal2++)
1099  {
1100  for (int pointOrdinal1=0; pointOrdinal1<numPoints1_; pointOrdinal1++)
1101  {
1102  int pointOrdinal = (pointOrdinal3 * numPoints2_ + pointOrdinal2) * numPoints1_ + pointOrdinal1;
1103  for (int d=0; d<spaceDim; d++)
1104  {
1105  output_(fieldOrdinal,pointOrdinal,d) = weight_ * input1_(fieldOrdinal1,pointOrdinal1) * input2_(fieldOrdinal2,pointOrdinal2) * input3_(fieldOrdinal3,pointOrdinal3,d);
1106  }
1107  }
1108  }
1109  }
1110  }
1111  });
1112  }
1113  else
1114  {
1115  // unsupported rank combination -- enforced in constructor
1116  }
1117  }
1118  }
1119  };
1120 
1121 
1122  template<typename Basis1, typename Basis2, typename Basis3,
1123  typename ExecutionSpace=typename Basis1::ExecutionSpace,
1124  typename OutputScalar = double,
1125  typename PointScalar = double>
1127  : public Basis_TensorBasis< Basis_TensorBasis<Basis1,Basis2>,
1128  Basis3>
1129  {
1132 
1133  public:
1134  using OutputViewType = typename TensorBasis123::OutputViewType;
1135  using PointViewType = typename TensorBasis123::PointViewType;
1136  using ScalarViewType = typename TensorBasis123::ScalarViewType;
1137 
1138  using OutputValueType = typename TensorBasis123::OutputValueType;
1139  using PointValueType = typename TensorBasis123::PointValueType;
1140  protected:
1141  Basis1 basis1_;
1142  Basis2 basis2_;
1143  Basis3 basis3_;
1144  public:
1145  Basis_TensorBasis3(Basis1 basis1, Basis2 basis2, Basis3 basis3)
1146  :
1147  TensorBasis123(Basis12(basis1,basis2),basis3),
1148  basis1_(basis1),
1149  basis2_(basis2),
1150  basis3_(basis3)
1151  {}
1152 
1154 
1179  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1180  const PointViewType inputPoints12, const PointViewType inputPoints3,
1181  bool tensorPoints) const override
1182  {
1183  // TODO: rework this to use superclass's getComponentPoints.
1184 
1185  int spaceDim1 = basis1_.getBaseCellTopology().getDimension();
1186  int spaceDim2 = basis2_.getBaseCellTopology().getDimension();
1187 
1188  int totalSpaceDim12 = inputPoints12.extent_int(1);
1189 
1190  TEUCHOS_ASSERT(spaceDim1 + spaceDim2 == totalSpaceDim12);
1191 
1192  if (!tensorPoints)
1193  {
1194  auto inputPoints1 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(0,spaceDim1));
1195  auto inputPoints2 = Kokkos::subview(inputPoints12,Kokkos::ALL(),std::make_pair(spaceDim1,totalSpaceDim12));
1196 
1197  this->getValues(outputValues, operatorType, inputPoints1, inputPoints2, inputPoints3, tensorPoints);
1198  }
1199  else
1200  {
1201  // superclass doesn't (yet) have a clever way to detect tensor points in a single container
1202  // we'd need something along those lines here to detect them in inputPoints12.
1203  // if we do add such a mechanism to superclass, it should be simple enough to call that from here
1204  INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "This method does not yet handle tensorPoints=true");
1205  }
1206  }
1207 
1235  virtual void getValues(OutputViewType outputValues, const EOperator operatorType,
1236  const PointViewType inputPoints1, const PointViewType inputPoints2, const PointViewType inputPoints3,
1237  bool tensorPoints) const = 0;
1238 
1266  void getValues( OutputViewType outputValues,
1267  const PointViewType inputPoints1, const EOperator operatorType1,
1268  const PointViewType inputPoints2, const EOperator operatorType2,
1269  const PointViewType inputPoints3, const EOperator operatorType3,
1270  bool tensorPoints, double weight=1.0) const
1271  {
1272  int basisCardinality1 = basis1_.getCardinality();
1273  int basisCardinality2 = basis2_.getCardinality();
1274  int basisCardinality3 = basis3_.getCardinality();
1275 
1276  int spaceDim1 = inputPoints1.extent_int(1);
1277  int spaceDim2 = inputPoints2.extent_int(1);
1278  int spaceDim3 = inputPoints3.extent_int(1);
1279 
1280  int totalPointCount;
1281  int pointCount1, pointCount2, pointCount3;
1282  if (tensorPoints)
1283  {
1284  pointCount1 = inputPoints1.extent_int(0);
1285  pointCount2 = inputPoints2.extent_int(0);
1286  pointCount3 = inputPoints3.extent_int(0);
1287  totalPointCount = pointCount1 * pointCount2 * pointCount3;
1288  }
1289  else
1290  {
1291  totalPointCount = inputPoints1.extent_int(0);
1292  pointCount1 = totalPointCount;
1293  pointCount2 = totalPointCount;
1294  pointCount3 = totalPointCount;
1295 
1296  INTREPID2_TEST_FOR_EXCEPTION((totalPointCount != inputPoints2.extent_int(0)) || (totalPointCount != inputPoints3.extent_int(0)),
1297  std::invalid_argument, "If tensorPoints is false, the point counts must match!");
1298  }
1299 
1300  // structure of this implementation:
1301  /*
1302  - allocate output1, output2, output3 containers
1303  - either:
1304  1. split off the tensor functor call into its own method in TensorBasis, and
1305  - call it once with output1, output2, placing these in another newly allocated output12, then
1306  - call it again with output12, output3
1307  OR
1308  2. create a 3-argument tensor functor and call it with output1,output2,output3
1309 
1310  At the moment, the 3-argument functor seems like a better approach. It's likely more code, but somewhat
1311  more efficient and easier to understand/debug. And the code is fairly straightforward to produce.
1312  */
1313 
1314  // copied from the 2-argument TensorBasis implementation:
1315 
1316  OutputViewType outputValues1, outputValues2, outputValues3;
1317 
1318  //Note: the gradient of HGRAD basis on a line has an output vector of rank 3, the last dimension being of size 1.
1319  // in particular this holds even when computing the divergence of an HDIV basis, which is scalar and has rank 2.
1320  if ((spaceDim1 == 1) && (operatorType1 == OPERATOR_VALUE))
1321  {
1322  // use a rank 2 container for basis1
1323  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1);
1324  }
1325  else
1326  {
1327  outputValues1 = getMatchingViewWithLabel(outputValues,"output values - basis 1",basisCardinality1,pointCount1,spaceDim1);
1328  }
1329  if ((spaceDim2 == 1) && (operatorType2 == OPERATOR_VALUE))
1330  {
1331  // use a rank 2 container for basis2
1332  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2);
1333  }
1334  else
1335  {
1336  outputValues2 = getMatchingViewWithLabel(outputValues,"output values - basis 2",basisCardinality2,pointCount2,spaceDim2);
1337  }
1338  if ((spaceDim3 == 1) && (operatorType3 == OPERATOR_VALUE))
1339  {
1340  // use a rank 2 container for basis2
1341  outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3);
1342  }
1343  else
1344  {
1345  outputValues3 = getMatchingViewWithLabel(outputValues,"output values - basis 3",basisCardinality3,pointCount3,spaceDim3);
1346  }
1347 
1348  basis1_.getValues(outputValues1,inputPoints1,operatorType1);
1349  basis2_.getValues(outputValues2,inputPoints2,operatorType2);
1350  basis3_.getValues(outputValues3,inputPoints3,operatorType3);
1351 
1352  const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputScalar>();
1353  const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointScalar>();
1354  const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1355 
1356  auto policy = Kokkos::TeamPolicy<ExecutionSpace>(basisCardinality1,Kokkos::AUTO(),vectorSize);
1357 
1359  FunctorType functor(outputValues, outputValues1, outputValues2, outputValues3, tensorPoints, weight);
1360  Kokkos::parallel_for( policy , functor, "TensorBasis3_Functor");
1361  }
1362  };
1363 } // end namespace Intrepid2
1364 
1365 #endif /* Intrepid2_TensorBasis_h */
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints12, const PointViewType inputPoints3, bool tensorPoints) const override
Evaluation of a tensor FEM basis on a reference cell.
KOKKOS_INLINE_FUNCTION ScalarType getView1Entry()
pointValueType PointValueType
Point value type for basis; default is double.
Functor for computing values for the TensorBasis class.
Kokkos::View< ordinal_type *, typename ExecSpaceType::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
View type for 1d host array.
Kokkos::View< ordinal_type **, typename ExecSpaceType::array_layout, Kokkos::HostSpace > OrdinalTypeArray2DHost
View type for 2d host array.
virtual const char * getName() const override
Returns basis name.
ordinal_type basisCardinality_
Cardinality of the basis, i.e., the number of basis functions/degrees-of-freedom. ...
ECoordinates basisCoordinates_
The coordinate system for which the basis is defined.
ordinal_type getTensorDkEnumeration(ordinal_type dkEnum1, ordinal_type operatorOrder1, ordinal_type dkEnum2, ordinal_type operatorOrder2) const
Given &quot;Dk&quot; enumeration indices for the component bases, returns a Dk enumeration index for the compos...
An abstract base class that defines interface for concrete basis implementations for Finite Element (...
virtual void getDofCoords(typename BasisSuper::ScalarViewType dofCoords) const override
Fills in spatial locations (coordinates) of degrees of freedom (nodes) on the reference cell...
void getValues(OutputViewType outputValues, const PointViewType inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell.
Basis_TensorBasis(Basis1 basis1, Basis2 basis2)
Constructor.
virtual void getValues(OutputViewType, const PointViewType, const EOperator=OPERATOR_VALUE) const
Evaluation of a FEM basis on a reference cell.
ordinal_type basisDegree_
Degree of the largest complete polynomial space that can be represented by the basis.
For two cell topologies whose tensor product is a third, this class establishes a mapping from subcel...
Header function for Intrepid2::Util class and other utility functions.
Implementation of an assert that can safely be called from device code.
unsigned getCompositeSubcellOrdinal(unsigned subcell1Dim, unsigned subcell1Ordinal, unsigned subcell2Dim, unsigned subcell2Ordinal)
Map from component subcell ordinals to the corresponding composite subcell ordinal.
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, ExecSpaceType > OutputViewType
View type for basis value output.
KOKKOS_INLINE_FUNCTION void set(ScalarType value)
KOKKOS_INLINE_FUNCTION void setLocation(const Kokkos::Array< int, 7 > location)
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell.
OrdinalTypeArray2DHost fieldOrdinalPolynomialDegree_
Polynomial degree for each degree of freedom. Only defined for hierarchical bases right now...
KOKKOS_INLINE_FUNCTION ScalarType getView2Entry()
void getComponentPoints(const PointViewType inputPoints, const bool attemptTensorDecomposition, PointViewType &inputPoints1, PointViewType &inputPoints2, bool &tensorDecompositionSucceeded) const
Method to extract component points from composite points.
void setOrdinalTagData(OrdinalTypeView3D &tagToOrdinal, OrdinalTypeView2D &ordinalToTag, const OrdinalTypeView1D tags, const ordinal_type basisCard, const ordinal_type tagSize, const ordinal_type posScDim, const ordinal_type posScOrd, const ordinal_type posDfOrd)
Fills ordinalToTag_ and tagToOrdinal_ by basis-specific tag data.
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, ExecSpaceType > PointViewType
View type for input points.
Implementation of support for traversing component views alongside a view that represents a combinati...
void getValues(OutputViewType outputValues, const PointViewType inputPoints1, const EOperator operatorType1, const PointViewType inputPoints2, const EOperator operatorType2, const PointViewType inputPoints3, const EOperator operatorType3, bool tensorPoints, double weight=1.0) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
KOKKOS_INLINE_FUNCTION int nextIncrementRank()
outputValueType OutputValueType
Output value type for basis; default is double.
Kokkos::DynRankView< scalarType, Kokkos::LayoutStride, ExecSpaceType > ScalarViewType
View type for scalars.
virtual void getValues(OutputViewType outputValues, const EOperator operatorType, const PointViewType inputPoints1, const PointViewType inputPoints2, bool tensorPoints) const
Evaluation of a tensor FEM basis on a reference cell; subclasses should override this.
Class that defines mappings from component cell topologies to their tensor product topologies...
ExecSpaceType ExecutionSpace
(Kokkos) Execution space for basis.
A helper class that allows iteration over three Kokkos Views simultaneously, according to tensor comb...
Basis defined as the tensor product of two component bases.
Functor for computing values for the TensorBasis3 class.
Header file for the abstract base class Intrepid2::Basis.
OrdinalTypeArray2DHost ordinalToTag_
&quot;true&quot; if tagToOrdinal_ and ordinalToTag_ have been initialized
shards::CellTopology basisCellTopology_
Base topology of the cells for which the basis is defined. See the Shards package for definition of b...