8 #ifndef Intrepid2_DataCombiners_hpp
9 #define Intrepid2_DataCombiners_hpp
20 #include "Intrepid2_ScalarView.hpp"
23 template<
class DataScalar,
typename DeviceType>
26 template<
class BinaryOperator,
class ThisUnderlyingViewType,
class AUnderlyingViewType,
class BUnderlyingViewType,
27 class ArgExtractorThis,
class ArgExtractorA,
class ArgExtractorB,
bool includeInnerLoop=
false>
31 ThisUnderlyingViewType this_underlying_;
32 AUnderlyingViewType A_underlying_;
33 BUnderlyingViewType B_underlying_;
34 BinaryOperator binaryOperator_;
37 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
38 BinaryOperator binaryOperator)
40 this_underlying_(this_underlying),
41 A_underlying_(A_underlying),
42 B_underlying_(B_underlying),
43 binaryOperator_(binaryOperator)
45 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,
"If includeInnerLoop is true, must specify the size of the inner loop");
48 InPlaceCombinationFunctor(ThisUnderlyingViewType this_underlying, AUnderlyingViewType A_underlying, BUnderlyingViewType B_underlying,
49 BinaryOperator binaryOperator,
int innerLoopSize)
51 this_underlying_(this_underlying),
52 A_underlying_(A_underlying),
53 B_underlying_(B_underlying),
54 binaryOperator_(binaryOperator),
55 innerLoopSize_(innerLoopSize)
57 INTREPID2_TEST_FOR_EXCEPTION(includeInnerLoop,std::invalid_argument,
"If includeInnerLoop is true, must specify the size of the inner loop");
60 template<
class ...IntArgs,
bool M=includeInnerLoop>
61 KOKKOS_INLINE_FUNCTION
63 operator()(
const IntArgs&... args)
const
65 auto & result = ArgExtractorThis::get( this_underlying_, args... );
66 const auto & A_val = ArgExtractorA::get( A_underlying_, args... );
67 const auto & B_val = ArgExtractorB::get( B_underlying_, args... );
69 result = binaryOperator_(A_val,B_val);
72 template<
class ...IntArgs,
bool M=includeInnerLoop>
73 KOKKOS_INLINE_FUNCTION
75 operator()(
const IntArgs&... args)
const
77 using int_type = std::tuple_element_t<0, std::tuple<IntArgs...>>;
78 for (int_type iFinal=0; iFinal<static_cast<int_type>(innerLoopSize_); iFinal++)
80 auto & result = ArgExtractorThis::get( this_underlying_, args..., iFinal );
81 const auto & A_val = ArgExtractorA::get( A_underlying_, args..., iFinal );
82 const auto & B_val = ArgExtractorB::get( B_underlying_, args..., iFinal );
84 result = binaryOperator_(A_val,B_val);
90 template<
class BinaryOperator,
class ThisUnderlyingViewType,
class AUnderlyingViewType,
class BUnderlyingViewType>
94 ThisUnderlyingViewType this_underlying_;
95 AUnderlyingViewType A_underlying_;
96 BUnderlyingViewType B_underlying_;
97 BinaryOperator binaryOperator_;
100 AUnderlyingViewType A_underlying,
101 BUnderlyingViewType B_underlying,
102 BinaryOperator binaryOperator)
104 this_underlying_(this_underlying),
105 A_underlying_(A_underlying),
106 B_underlying_(B_underlying),
107 binaryOperator_(binaryOperator)
109 INTREPID2_TEST_FOR_EXCEPTION(this_underlying.extent(0) != 1,std::invalid_argument,
"all views for InPlaceCombinationFunctorConstantCase should have rank 1 and extent 1");
110 INTREPID2_TEST_FOR_EXCEPTION(A_underlying.extent(0) != 1,std::invalid_argument,
"all views for InPlaceCombinationFunctorConstantCase should have rank 1 and extent 1");
111 INTREPID2_TEST_FOR_EXCEPTION(B_underlying.extent(0) != 1,std::invalid_argument,
"all views for InPlaceCombinationFunctorConstantCase should have rank 1 and extent 1");
114 KOKKOS_INLINE_FUNCTION
115 void operator()(
const int arg0)
const
117 auto & result = this_underlying_(0);
118 const auto & A_val = A_underlying_(0);
119 const auto & B_val = B_underlying_(0);
121 result = binaryOperator_(A_val,B_val);
126 template<
bool passThroughBlockDiagonalArgs>
129 template<
class ViewType,
class ...IntArgs>
130 static KOKKOS_INLINE_FUNCTION
typename ViewType::reference_type
get(
const ViewType &view,
const IntArgs&... intArgs)
132 return view.getWritableEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
137 template<
bool passThroughBlockDiagonalArgs>
140 template<
class ViewType,
class ...IntArgs>
141 static KOKKOS_INLINE_FUNCTION
typename ViewType::const_reference_type
get(
const ViewType &view,
const IntArgs&... intArgs)
143 return view.getEntryWithPassThroughOption(passThroughBlockDiagonalArgs, intArgs...);
148 template <
class DataScalar,
typename DeviceType,
class BinaryOperator>
151 using reference_type =
typename ScalarView<DataScalar,DeviceType>::reference_type;
152 using const_reference_type =
typename ScalarView<const DataScalar,DeviceType>::reference_type;
155 template<
class PolicyType,
class ThisUnderlyingViewType,
class AUnderlyingViewType,
class BUnderlyingViewType,
156 class ArgExtractorThis,
class ArgExtractorA,
class ArgExtractorB>
158 AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying,
159 BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
162 Functor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
163 Kokkos::parallel_for(
"compute in-place", policy, functor);
169 enable_if_t<rank != 7, void>
172 auto policy = thisData.template dataExtentRangePolicy<rank>();
201 const auto & variationTypes = data.getVariationTypes();
202 for (
int d=0; d<rank; d++)
204 if (variationTypes[d] == GENERAL)
214 auto thisAE = constArg;
217 auto & this_underlying = thisData.template getUnderlyingView<1>();
218 auto & A_underlying = A.template getUnderlyingView<1>();
219 auto & B_underlying = B.template getUnderlyingView<1>();
220 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
222 else if (this_full && A_full && B_full)
224 auto thisAE = fullArgs;
228 auto & this_underlying = thisData.template getUnderlyingView<rank>();
229 auto & A_underlying = A.template getUnderlyingView<rank>();
230 auto & B_underlying = B.template getUnderlyingView<rank>();
232 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
237 auto & A_underlying = A.template getUnderlyingView<1>();
240 auto thisAE = fullArgs;
241 auto & this_underlying = thisData.template getUnderlyingView<rank>();
246 auto & B_underlying = B.template getUnderlyingView<rank>();
247 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
251 auto BAE = fullArgsData;
258 if (B_1D && (get1DArgIndex(B) != -1) )
261 const int argIndex = get1DArgIndex(B);
262 auto & B_underlying = B.template getUnderlyingView<1>();
263 auto & this_underlying = thisData.template getUnderlyingView<1>();
266 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, AAE, arg0);
break;
267 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, AAE, arg1);
break;
268 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, AAE, arg2);
break;
269 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, AAE, arg3);
break;
270 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, AAE, arg4);
break;
271 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, AAE, arg5);
break;
272 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
278 auto thisAE = fullArgsWritable;
279 auto BAE = fullArgsData;
287 auto & B_underlying = B.template getUnderlyingView<1>();
290 auto thisAE = fullArgs;
291 auto & this_underlying = thisData.template getUnderlyingView<rank>();
295 auto & A_underlying = A.template getUnderlyingView<rank>();
297 storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, BAE);
302 auto AAE = fullArgsData;
309 if (A_1D && (get1DArgIndex(A) != -1) )
312 const int argIndex = get1DArgIndex(A);
313 auto & A_underlying = A.template getUnderlyingView<1>();
314 auto & this_underlying = thisData.template getUnderlyingView<1>();
317 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, BAE);
break;
318 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, BAE);
break;
319 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, BAE);
break;
320 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, BAE);
break;
321 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, BAE);
break;
322 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, BAE);
break;
323 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
329 auto thisAE = fullArgsWritable;
330 auto AAE = fullArgsData;
337 if (this_1D && (get1DArgIndex(thisData) != -1))
344 const int argThis = get1DArgIndex(thisData);
345 const int argA = get1DArgIndex(A);
346 const int argB = get1DArgIndex(B);
348 auto & A_underlying = A.template getUnderlyingView<1>();
349 auto & B_underlying = B.template getUnderlyingView<1>();
350 auto & this_underlying = thisData.template getUnderlyingView<1>();
351 if ((argA != -1) && (argB != -1))
353 #ifdef INTREPID2_HAVE_DEBUG
354 INTREPID2_TEST_FOR_EXCEPTION(argA != argThis, std::logic_error,
"Unexpected 1D arg combination.");
355 INTREPID2_TEST_FOR_EXCEPTION(argB != argThis, std::logic_error,
"Unexpected 1D arg combination.");
359 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg0, arg0, arg0);
break;
360 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg1, arg1, arg1);
break;
361 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg2, arg2, arg2);
break;
362 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg3, arg3, arg3);
break;
363 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg4, arg4, arg4);
break;
364 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, arg5, arg5, arg5);
break;
365 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
373 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg0, arg0, fullArgsData);
break;
374 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg1, arg1, fullArgsData);
break;
375 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg2, arg2, fullArgsData);
break;
376 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg3, arg3, fullArgsData);
break;
377 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg4, arg4, fullArgsData);
break;
378 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, arg5, arg5, fullArgsData);
break;
379 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
387 case 0:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg0, fullArgsData, arg0);
break;
388 case 1:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg1, fullArgsData, arg1);
break;
389 case 2:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg2, fullArgsData, arg2);
break;
390 case 3:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg3, fullArgsData, arg3);
break;
391 case 4:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg4, fullArgsData, arg4);
break;
392 case 5:
storeInPlaceCombination(policy, this_underlying, A, B_underlying, binaryOperator, arg5, fullArgsData, arg5);
break;
393 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
400 auto & this_underlying = thisData.template getUnderlyingView<rank>();
401 auto thisAE = fullArgs;
405 auto & A_underlying = A.template getUnderlyingView<rank>();
408 if (B_1D && (get1DArgIndex(B) != -1))
410 const int argIndex = get1DArgIndex(B);
411 auto & B_underlying = B.template getUnderlyingView<1>();
414 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg0);
break;
415 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg1);
break;
416 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg2);
break;
417 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg3);
break;
418 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg4);
break;
419 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, AAE, arg5);
break;
420 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
433 if (A_1D && (get1DArgIndex(A) != -1))
435 const int argIndex = get1DArgIndex(A);
436 auto & A_underlying = A.template getUnderlyingView<1>();
439 auto & B_underlying = B.template getUnderlyingView<rank>();
443 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg0, BAE);
break;
444 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg1, BAE);
break;
445 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg2, BAE);
break;
446 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg3, BAE);
break;
447 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg4, BAE);
break;
448 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B_underlying, binaryOperator, thisAE, arg5, BAE);
break;
449 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
454 auto BAE = fullArgsData;
457 case 0:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg0, BAE);
break;
458 case 1:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg1, BAE);
break;
459 case 2:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg2, BAE);
break;
460 case 3:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg3, BAE);
break;
461 case 4:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg4, BAE);
break;
462 case 5:
storeInPlaceCombination(policy, this_underlying, A_underlying, B, binaryOperator, thisAE, arg5, BAE);
break;
463 default: INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid/unexpected arg index");
470 auto AAE = fullArgsData;
471 auto BAE = fullArgsData;
479 auto thisAE = fullArgsWritable;
480 auto AAE = fullArgsData;
481 auto BAE = fullArgsData;
490 enable_if_t<rank == 7, void>
493 auto policy = thisData.template dataExtentRangePolicy<rank>();
501 const bool includeInnerLoop =
true;
503 Functor functor(thisData, A, B, binaryOperator, dim6);
504 Kokkos::parallel_for(
"compute in-place", policy, functor);
509 using ExecutionSpace =
typename DeviceType::execution_space;
511 #ifdef INTREPID2_HAVE_DEBUG
513 for (
int d=0; d<rank_; d++)
515 INTREPID2_TEST_FOR_EXCEPTION(A.
extent_int(d) != thisData.
extent_int(d), std::invalid_argument,
"A, B, and this must agree on all logical extents");
516 INTREPID2_TEST_FOR_EXCEPTION(B.
extent_int(d) != thisData.
extent_int(d), std::invalid_argument,
"A, B, and this must agree on all logical extents");
527 Kokkos::RangePolicy<ExecutionSpace> policy(ExecutionSpace(),0,1);
529 auto this_underlying = thisData.template getUnderlyingView<1>();
530 auto A_underlying = A.template getUnderlyingView<1>();
531 auto B_underlying = B.template getUnderlyingView<1>();
533 using ConstantCaseFunctor = InPlaceCombinationFunctorConstantCase<decltype(binaryOperator), decltype(this_underlying),
534 decltype(A_underlying), decltype(B_underlying)>;
536 ConstantCaseFunctor functor(this_underlying, A_underlying, B_underlying, binaryOperator);
537 Kokkos::parallel_for(
"compute in-place", policy,functor);
541 switch (thisData.
rank())
543 case 1: storeInPlaceCombination<1>(thisData, A, B, binaryOperator);
break;
544 case 2: storeInPlaceCombination<2>(thisData, A, B, binaryOperator);
break;
545 case 3: storeInPlaceCombination<3>(thisData, A, B, binaryOperator);
break;
546 case 4: storeInPlaceCombination<4>(thisData, A, B, binaryOperator);
break;
547 case 5: storeInPlaceCombination<5>(thisData, A, B, binaryOperator);
break;
548 case 6: storeInPlaceCombination<6>(thisData, A, B, binaryOperator);
break;
549 case 7: storeInPlaceCombination<7>(thisData, A, B, binaryOperator);
break;
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewRank() const
returns the rank of the View that stores the unique data
static enable_if_t< rank==7, void > storeInPlaceCombination(Data< DataScalar, DeviceType > &thisData, const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank of 7. (Not optimized; expect...
#define INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(test, x, msg)
Defines the Data class, a wrapper around a Kokkos::View that allows data that is constant or repeatin...
KOKKOS_INLINE_FUNCTION bool underlyingMatchesLogical() const
Returns true if the underlying container has exactly the same rank and extents as the logical contain...
Wrapper around a Kokkos::View that allows data that is constant or repeating in various logical dimen...
static void storeInPlaceCombination(PolicyType &policy, ThisUnderlyingViewType &this_underlying, AUnderlyingViewType &A_underlying, BUnderlyingViewType &B_underlying, BinaryOperator &binaryOperator, ArgExtractorThis argThis, ArgExtractorA argA, ArgExtractorB argB)
storeInPlaceCombination implementation for rank < 7, with compile-time underlying views and argument ...
Defines functors for use with Data objects: so far, we include simple arithmetical functors for sum...
functor definition for the constant-data case.
KOKKOS_INLINE_FUNCTION ordinal_type getUnderlyingViewSize() const
returns the number of entries in the View that stores the unique data
KOKKOS_INLINE_FUNCTION unsigned rank() const
Returns the logical rank of the Data container.
KOKKOS_INLINE_FUNCTION int extent_int(const int &r) const
Returns the logical extent in the specified dimension.
Defines DataVariationType enum that specifies the types of variation possible within a Data object...
static enable_if_t< rank!=7, void > storeInPlaceCombination(Data< DataScalar, DeviceType > &thisData, const Data< DataScalar, DeviceType > &A, const Data< DataScalar, DeviceType > &B, BinaryOperator binaryOperator)
storeInPlaceCombination with compile-time rank – implementation for rank < 7.
KOKKOS_INLINE_FUNCTION int getDataExtent(const ordinal_type &d) const
returns the true extent of the data corresponding to the logical dimension provided; if the data does...