Implementation of a general sum factorization algorithm, using a novel approach developed by Roberts, for integration. Uses hierarchical parallelism. More...

#include <Intrepid2_IntegrationToolsDef.hpp>

Public Member Functions
	F_IntegratePointValueCache (Data< Scalar, DeviceType > integralData, TensorData< Scalar, DeviceType > leftComponent, Data< Scalar, DeviceType > composedTransform, TensorData< Scalar, DeviceType > rightComponent, TensorData< Scalar, DeviceType > cellMeasures, int a_offset, int b_offset, int leftFieldOrdinalOffset, int rightFieldOrdinalOffset)

template<size_t maxComponents, size_t numComponents = maxComponents>
KOKKOS_INLINE_FUNCTION int	incrementArgument (Kokkos::Array< int, maxComponents > &arguments, const Kokkos::Array< int, maxComponents > &bounds) const

KOKKOS_INLINE_FUNCTION int	incrementArgument (Kokkos::Array< int, Parameters::MaxTensorComponents > &arguments, const Kokkos::Array< int, Parameters::MaxTensorComponents > &bounds, const int &numComponents) const
	runtime-sized variant of incrementArgument; gets used by approximate flop count.

template<size_t maxComponents, size_t numComponents = maxComponents>
KOKKOS_INLINE_FUNCTION int	nextIncrementResult (const Kokkos::Array< int, maxComponents > &arguments, const Kokkos::Array< int, maxComponents > &bounds) const

KOKKOS_INLINE_FUNCTION int	nextIncrementResult (const Kokkos::Array< int, Parameters::MaxTensorComponents > &arguments, const Kokkos::Array< int, Parameters::MaxTensorComponents > &bounds, const int &numComponents) const
	runtime-sized variant of nextIncrementResult; gets used by approximate flop count.

template<size_t maxComponents, size_t numComponents = maxComponents>
KOKKOS_INLINE_FUNCTION int	relativeEnumerationIndex (const Kokkos::Array< int, maxComponents > &arguments, const Kokkos::Array< int, maxComponents > &bounds, const int startIndex) const

template<int rank>
KOKKOS_INLINE_FUNCTION enable_if_t< rank==3 &&rank==integralViewRank, Scalar & >	integralViewEntry (const IntegralViewType &integralView, const int &cellDataOrdinal, const int &i, const int &j) const

template<int rank>
KOKKOS_INLINE_FUNCTION enable_if_t< rank==2 &&rank==integralViewRank, Scalar & >	integralViewEntry (const IntegralViewType &integralView, const int &cellDataOrdinal, const int &i, const int &j) const

KOKKOS_INLINE_FUNCTION void	runSpecialized3 (const TeamMember &teamMember) const
	Hand-coded 3-component version.

template<size_t numTensorComponents>
KOKKOS_INLINE_FUNCTION void	run (const TeamMember &teamMember) const

KOKKOS_INLINE_FUNCTION void	operator() (const TeamMember &teamMember) const

long	approximateFlopCountPerCell () const
	returns an estimate of the number of floating point operations per cell (counting sums, subtractions, divisions, and multiplies, each of which counts as one operation).

int	teamSize (const int &maxTeamSizeFromKokkos) const
	returns the team size that should be provided to the policy constructor, based on the Kokkos maximum and the amount of thread parallelism we have available.

size_t	team_shmem_size (int numThreads) const
	Provide the shared memory capacity.

Private Types
using	ExecutionSpace = typename DeviceType::execution_space

using	TeamPolicy = Kokkos::TeamPolicy< DeviceType >

using	TeamMember = typename TeamPolicy::member_type

using	IntegralViewType = Kokkos::View< typename RankExpander< Scalar, integralViewRank >::value_type, DeviceType >

Private Attributes
IntegralViewType	integralView_

TensorData< Scalar, DeviceType >	leftComponent_

Data< Scalar, DeviceType >	composedTransform_

TensorData< Scalar, DeviceType >	rightComponent_

TensorData< Scalar, DeviceType >	cellMeasures_

int	a_offset_

int	b_offset_

int	leftComponentSpan_

int	rightComponentSpan_

int	numTensorComponents_

int	leftFieldOrdinalOffset_

int	rightFieldOrdinalOffset_

size_t	fad_size_output_ = 0

Kokkos::Array< int, Parameters::MaxTensorComponents >	leftFieldBounds_

Kokkos::Array< int, Parameters::MaxTensorComponents >	rightFieldBounds_

Kokkos::Array< int, Parameters::MaxTensorComponents >	pointBounds_

int	maxFieldsLeft_

int	maxFieldsRight_

int	maxPointCount_

Detailed Description

template<class Scalar, class DeviceType, int integralViewRank>
class Intrepid2::Impl::F_IntegratePointValueCache< Scalar, DeviceType, integralViewRank >

Implementation of a general sum factorization algorithm, using a novel approach developed by Roberts, for integration. Uses hierarchical parallelism.

Whereas F_Integrate, and Mora and Demkowicz, and all others we are aware of, cache partial sums at intermediate component levels — the cached values are indexed by component basis ordinals — we integrate the first component in its dimension(s) and store values for integration points in the remaining dimensions, so that our caches are indexed by point ordinals. If there are L_x, L_y, and L_z quadrature points in dimensions x,y,z, we require a cache of size L_y * L_z +1 for a 3D, 3-component integral. The standard approach requires a cache of size (p_x+1)*(p_y+1). So long as one is not over-integrating by too much, these sizes are about the same. The real advantage of our approach here is (we expect) that it improves data locality.

Definition at line 992 of file Intrepid2_IntegrationToolsDef.hpp.

The documentation for this class was generated from the following file:

http://docs.trilinos.org/dev/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp

Public Member Functions

Private Types

Private Attributes

Detailed Description

template<class Scalar, class DeviceType, int integralViewRank> class Intrepid2::Impl::F_IntegratePointValueCache< Scalar, DeviceType, integralViewRank >

template<class Scalar, class DeviceType, int integralViewRank>
class Intrepid2::Impl::F_IntegratePointValueCache< Scalar, DeviceType, integralViewRank >