16 #ifndef Intrepid2_DataCombiners_hpp
17 #define Intrepid2_DataCombiners_hpp
28 #include "Intrepid2_ScalarView.hpp"
31 template<
class DataScalar,
typename DeviceType>
34 template<
class BinaryOperator,
class ThisUnderlyingViewType,
class AUnderlyingViewType,
class BUnderlyingViewType,
35 class ArgExtractorThis,
class ArgExtractorA,
class ArgExtractorB,
bool includeInnerLoop=
false>
39 ThisUnderlyingViewType this_underlying_;
40 AUnderlyingViewType A_underlying_;
41 BUnderlyingViewType B_underlying_;
42 BinaryOperator binaryOperator_;
45 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
46 BinaryOperator binaryOperator)
48 this_underlying_(this_underlying),
49 A_underlying_(A_underlying),
50 B_underlying_(B_underlying),
51 binaryOperator_(binaryOperator)
53 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,
"If includeInnerLoop is true, must specify the size of the inner loop");
56 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
57 BinaryOperator binaryOperator,
int innerLoopSize)
59 this_underlying_(this_underlying),
60 A_underlying_(A_underlying),
61 B_underlying_(B_underlying),
62 binaryOperator_(binaryOperator),
63 innerLoopSize_(innerLoopSize)
65 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,
"If includeInnerLoop is true, must specify the size of the inner loop");
68 template<
class ...IntArgs,
bool M=includeInnerLoop>
69 KOKKOS_INLINE_FUNCTION
71 operator()(
const IntArgs&... args)
const
73 auto & result = ArgExtractorThis::get( this_underlying_, args... );
74 const auto & A_val = ArgExtractorA::get( A_underlying_, args... );
75 const auto & B_val = ArgExtractorB::get( B_underlying_, args... );
77 result = binaryOperator_(A_val,B_val);
80 template<
class ...IntArgs,
bool M=includeInnerLoop>
81 KOKKOS_INLINE_FUNCTION
83 operator()(
const IntArgs&... args)
const
85 using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
86 for (int_type iFinal=0; iFinal<static_cast<int_type>(innerLoopSize_); iFinal++)
88 auto & result = ArgExtractorThis::get( this_underlying_, args..., iFinal );
89 const auto & A_val = ArgExtractorA::get( A_underlying_, args..., iFinal );
90 const auto & B_val = ArgExtractorB::get( B_underlying_, args..., iFinal );
92 result = binaryOperator_(A_val,B_val);
98 template<
class BinaryOperator,
class ThisUnderlyingViewType,
class AUnderlyingViewType,
class BUnderlyingViewType>
102 ThisUnderlyingViewType this_underlying_;
103 AUnderlyingViewType A_underlying_;
104 BUnderlyingViewType B_underlying_;
105 BinaryOperator binaryOperator_;
108 AUnderlyingViewType A_underlying,
109 BUnderlyingViewType B_underlying,
110 BinaryOperator binaryOperator)
112 this_underlying_(this_underlying),
113 A_underlying_(A_underlying),
114 B_underlying_(B_underlying),
115 binaryOperator_(binaryOperator)
117 INTREPID2_TEST_FOR_EXCEPTION(this_underlying.extent(0) != 1,std::invalid_argument,
"all views for InPlaceCombinationFunctorConstantCase should have rank 1 and extent 1");
118 INTREPID2_TEST_FOR_EXCEPTION(A_underlying.extent(0) != 1,std::invalid_argument,
"all views for InPlaceCombinationFunctorConstantCase should have rank 1 and extent 1");
119 INTREPID2_TEST_FOR_EXCEPTION(B_underlying.extent(0) != 1,std::invalid_argument,
"all views for InPlaceCombinationFunctorConstantCase should have rank 1 and extent 1");
122 KOKKOS_INLINE_FUNCTION
123 void operator()(
const int arg0)
const
125 auto & result = this_underlying_(0);
126 const auto & A_val = A_underlying_(0);
127 const auto & B_val = B_underlying_(0);
129 result = binaryOperator_(A_val,B_val);
134 template<
bool passThroughBlockDiagonalArgs>
137 template<
class ViewType,
class ...IntArgs>
138 static KOKKOS_INLINE_FUNCTION
typename ViewType::reference_type
get(
const ViewType &view,
const IntArgs&... intArgs)
140 return view.getWritableEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
145 template<
bool passThroughBlockDiagonalArgs>
148 template<
class ViewType,
class ...IntArgs>
149 static KOKKOS_INLINE_FUNCTION
typename ViewType::const_reference_type
get(
const ViewType &view,
const IntArgs&... intArgs)
151 return view.getEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
156 template <
class DataScalar,
typename DeviceType,
class BinaryOperator>
159 using reference_type =
typename ScalarView<DataScalar,DeviceType>::reference_type;
160 using const_reference_type =
typename ScalarView<const DataScalar,DeviceType>::reference_type;
163 template<
class PolicyType,
class ThisUnderlyingViewType,
class AUnderlyingViewType,
class BUnderlyingViewType,
164 class ArgExtractorThis,
class ArgExtractorA,
class ArgExtractorB>
166 AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying,
167 BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
170 Functor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
171 Kokkos::parallel_for(
"compute in-place", policy, functor);
177 enable_if_t<rank != 7, void>
180 auto policy = thisData.template dataExtentRangePolicy<rank>();
209 const auto & variationTypes = data.getVariationTypes();
210 for (
int d=0; d<rank; d++)
212 if (variationTypes[d] == GENERAL)
222 auto thisAE = constArg;
225 auto & this_underlying = thisData.template getUnderlyingView<1>();
226 auto & A_underlying = A.template getUnderlyingView<1>();
227 auto & B_underlying = B.template getUnderlyingView<1>();
228 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
230 else if (this_full && A_full && B_full)
232 auto thisAE = fullArgs;
236 auto & this_underlying = thisData.template getUnderlyingView<rank>();
237 auto & A_underlying = A.template getUnderlyingView<rank>();
238 auto & B_underlying = B.template getUnderlyingView<rank>();
240 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
245 auto & A_underlying = A.template getUnderlyingView<1>();
248 auto thisAE = fullArgs;
249 auto & this_underlying = thisData.template getUnderlyingView<rank>();
254 auto & B_underlying = B.template getUnderlyingView<rank>();
255 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
259 auto BAE = fullArgsData;
266 if (B_1D && (get1DArgIndex(B) != -1) )
269 const int argIndex = get1DArgIndex(B);
270 auto & B_underlying = B.template getUnderlyingView<1>();
271 auto & this_underlying = thisData.template getUnderlyingView<1>();
274 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, AAE, arg0);
break;
275 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, AAE, arg1);
break;
276 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, AAE, arg2);
break;
277 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, AAE, arg3);
break;
278 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, AAE, arg4);
break;
279 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, AAE, arg5);
break;
280 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
286 auto thisAE = fullArgsWritable;
287 auto BAE = fullArgsData;
295 auto & B_underlying = B.template getUnderlyingView<1>();
298 auto thisAE = fullArgs;
299 auto & this_underlying = thisData.template getUnderlyingView<rank>();
303 auto & A_underlying = A.template getUnderlyingView<rank>();
305 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
310 auto AAE = fullArgsData;
317 if (A_1D && (get1DArgIndex(A) != -1) )
320 const int argIndex = get1DArgIndex(A);
321 auto & A_underlying = A.template getUnderlyingView<1>();
322 auto & this_underlying = thisData.template getUnderlyingView<1>();
325 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, BAE);
break;
326 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, BAE);
break;
327 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, BAE);
break;
328 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, BAE);
break;
329 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, BAE);
break;
330 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, BAE);
break;
331 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
337 auto thisAE = fullArgsWritable;
338 auto AAE = fullArgsData;
345 if (this_1D && (get1DArgIndex(thisData) != -1))
352 const int argThis = get1DArgIndex(thisData);
353 const int argA = get1DArgIndex(A);
354 const int argB = get1DArgIndex(B);
356 auto & A_underlying = A.template getUnderlyingView<1>();
357 auto & B_underlying = B.template getUnderlyingView<1>();
358 auto & this_underlying = thisData.template getUnderlyingView<1>();
359 if ((argA != -1) && (argB != -1))
361 #ifdef INTREPID2_HAVE_DEBUG
362 INTREPID2_TEST_FOR_EXCEPTION(argA != argThis, std::logic_error,
"Unexpected 1D arg combination.");
363 INTREPID2_TEST_FOR_EXCEPTION(argB != argThis, std::logic_error,
"Unexpected 1D arg combination.");
367 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, arg0);
break;
368 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, arg1);
break;
369 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, arg2);
break;
370 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, arg3);
break;
371 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, arg4);
break;
372 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, arg5);
break;
373 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
381 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg0, arg0, fullArgsData);
break;
382 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg1, arg1, fullArgsData);
break;
383 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg2, arg2, fullArgsData);
break;
384 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg3, arg3, fullArgsData);
break;
385 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg4, arg4, fullArgsData);
break;
386 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg5, arg5, fullArgsData);
break;
387 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
395 case 0:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg0, fullArgsData, arg0);
break;
396 case 1:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg1, fullArgsData, arg1);
break;
397 case 2:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg2, fullArgsData, arg2);
break;
398 case 3:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg3, fullArgsData, arg3);
break;
399 case 4:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg4, fullArgsData, arg4);
break;
400 case 5:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg5, fullArgsData, arg5);
break;
401 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
408 auto & this_underlying = thisData.template getUnderlyingView<rank>();
409 auto thisAE = fullArgs;
413 auto & A_underlying = A.template getUnderlyingView<rank>();
416 if (B_1D && (get1DArgIndex(B) != -1))
418 const int argIndex = get1DArgIndex(B);
419 auto & B_underlying = B.template getUnderlyingView<1>();
422 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg0);
break;
423 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg1);
break;
424 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg2);
break;
425 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg3);
break;
426 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg4);
break;
427 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg5);
break;
428 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
441 if (A_1D && (get1DArgIndex(A) != -1))
443 const int argIndex = get1DArgIndex(A);
444 auto & A_underlying = A.template getUnderlyingView<1>();
447 auto & B_underlying = B.template getUnderlyingView<rank>();
451 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg0, BAE);
break;
452 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg1, BAE);
break;
453 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg2, BAE);
break;
454 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg3, BAE);
break;
455 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg4, BAE);
break;
456 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg5, BAE);
break;
457 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
462 auto BAE = fullArgsData;
465 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg0, BAE);
break;
466 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg1, BAE);
break;
467 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg2, BAE);
break;
468 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg3, BAE);
break;
469 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg4, BAE);
break;
470 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg5, BAE);
break;
471 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
478 auto AAE = fullArgsData;
479 auto BAE = fullArgsData;
487 auto thisAE = fullArgsWritable;
488 auto AAE = fullArgsData;
489 auto BAE = fullArgsData;
498 enable_if_t<rank == 7, void>
501 auto policy = thisData.template dataExtentRangePolicy<rank>();
509 const bool includeInnerLoop =
true;
511 Functor functor(thisData, A, B, binaryOperator, dim6);
512 Kokkos::parallel_for(
"compute in-place", policy, functor);
517 using ExecutionSpace =
typename DeviceType::execution_space;
519 #ifdef INTREPID2_HAVE_DEBUG
521 for (
int d=0; d<rank_; d++)
523 INTREPID2_TEST_FOR_EXCEPTION(A.
extent_int(d) != thisData.
extent_int(d), std::invalid_argument,
"A, B, and this must agree on all logical extents");
524 INTREPID2_TEST_FOR_EXCEPTION(B.
extent_int(d) != thisData.
extent_int(d), std::invalid_argument,
"A, B, and this must agree on all logical extents");
535 Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,1);
537 auto this_underlying = thisData.template getUnderlyingView<1>();
538 auto A_underlying = A.template getUnderlyingView<1>();
539 auto B_underlying = B.template getUnderlyingView<1>();
541 using ConstantCaseFunctor = InPlaceCombinationFunctorConstantCase<decltype(binaryOperator), decltype(this_underlying),
542 decltype(A_underlying), decltype(B_underlying)>;
544 ConstantCaseFunctor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
545 Kokkos::parallel_for(
"compute in-place", policy,functor);
549 switch (thisData.
rank())
551 case 1: storeInPlaceCombination<1>(thisData, A, B, binaryOperator);
break;
552 case 2: storeInPlaceCombination<2>(thisData, A, B, binaryOperator);
break;
553 case 3: storeInPlaceCombination<3>(thisData, A, B, binaryOperator);
break;
554 case 4: storeInPlaceCombination<4>(thisData, A, B, binaryOperator);
break;
555 case 5: storeInPlaceCombination<5>(thisData, A, B, binaryOperator);
break;
556 case 6: storeInPlaceCombination<6>(thisData, A, B, binaryOperator);
break;
557 case 7: storeInPlaceCombination<7>(thisData, A, B, binaryOperator);
break;
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewRank() const
returns the rank of the View that stores the unique data
static enable_if_t< rank==7, void > storeInPlaceCombination(Data< DataScalar, DeviceType > &thisData, const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank of 7. (Not optimized; expect...
#define INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(test, x, msg)
Defines the Data class, a wrapper around a Kokkos::View that allows data that is constant or repeatin...
KOKKOS_INLINE_FUNCTION bool underlyingMatchesLogical() const
Returns true if the underlying container has exactly the same rank and extents as the logical contain...
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
static void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying, AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying, BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
storeInPlaceCombination implementation for rank < 7, with compile-time underlying views and argument ...
Defines functors for use with Data objects: so far, we include simple arithmetical functors for sum...
functor definition for the constant-data case.
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewSize() const
returns the number of entries in the View that stores the unique data
KOKKOS_INLINE_FUNCTION unsigned rank() const
Returns the logical rank of the Data container.
KOKKOS_INLINE_FUNCTION int extent_int(const int &r) const
Returns the logical extent in the specified dimension.
Defines DataVariationType enum that specifies the types of variation possible within a Data object...
static enable_if_t< rank!=7, void > storeInPlaceCombination(Data< DataScalar, DeviceType > &thisData, const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank < 7.
KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
returns the true extent of the data corresponding to the logical dimension provided; if the data does...