1 #ifndef _COMPADRE_PARALLELMANAGER_HPP_
2 #define _COMPADRE_PARALLELMANAGER_HPP_
4 #include "Compadre_Config.h"
72 #ifdef COMPADRE_USE_CUDA
104 template<
typename Tag,
class C>
106 const int vector_lanes_per_thread, C functor)
const {
110 Kokkos::parallel_for(
111 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
116 functor,
typeid(Tag).name());
119 Kokkos::parallel_for(
120 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
124 functor,
typeid(Tag).name());
127 Kokkos::parallel_for(
128 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
132 functor,
typeid(Tag).name());
135 Kokkos::parallel_for(
136 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
139 functor,
typeid(Tag).name());
148 const int vector_lanes_per_thread, C functor, std::string functor_name =
typeid(C).name())
const {
152 Kokkos::parallel_for(
153 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
158 functor, functor_name);
161 Kokkos::parallel_for(
162 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
166 functor, functor_name);
169 Kokkos::parallel_for(
170 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
174 functor, functor_name);
177 Kokkos::parallel_for(
178 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
181 functor, functor_name);
188 template<
typename Tag,
class C>
191 CallFunctorWithTeamThreadsAndVectors<Tag,C>(batch_size, this->
getThreadsPerTeam(), 1, functor);
199 CallFunctorWithTeamThreadsAndVectors<C>(batch_size, this->
getThreadsPerTeam(), 1, functor, functor_name);
202 KOKKOS_INLINE_FUNCTION
211 KOKKOS_INLINE_FUNCTION
220 KOKKOS_INLINE_FUNCTION
229 KOKKOS_INLINE_FUNCTION
238 KOKKOS_INLINE_FUNCTION
243 KOKKOS_INLINE_FUNCTION
int _vector_lanes_per_thread
KOKKOS_INLINE_FUNCTION int getVectorLanesPerThread() const
std::size_t global_index_type
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
void setVectorLanesPerThread(const int value)
int _scratch_thread_level_b
higher (slower) level memory for Kokkos::parallel_for for thread access memory
KOKKOS_INLINE_FUNCTION int getTeamScratchSize(const int level) const
int _thread_scratch_size_b
int _thread_scratch_size_a
KOKKOS_INLINE_FUNCTION int getThreadScratchSize(const int level) const
void setTeamScratchLevel(const int level, const int value)
void CallFunctorWithTeamThreadsAndVectors(const global_index_type batch_size, const int threads_per_team, const int vector_lanes_per_thread, C functor, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
KOKKOS_INLINE_FUNCTION int getThreadsPerTeam(const int vector_lanes_per_thread=1) const
void setThreadScratchLevel(const int level, const int value)
void CallFunctorWithTeamThreads(const global_index_type batch_size, C functor) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
void CallFunctorWithTeamThreadsAndVectors(const global_index_type batch_size, const int threads_per_team, const int vector_lanes_per_thread, C functor) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
int _scratch_team_level_b
lowest level memory for Kokkos::parallel_for for thread access memory
void setTeamScratchSize(const int level, const int value)
void setThreadScratchSize(const int level, const int value)
int _scratch_thread_level_a
higher (slower) level memory for Kokkos::parallel_for for team access memory
int _scratch_team_level_a
lowest level memory for Kokkos::parallel_for for team access memory
void setThreadsPerTeam(const int value)
void CallFunctorWithTeamThreads(const global_index_type batch_size, C functor, std::string functor_name=typeid(C).name()) const
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
int _threads_per_team
calculated number of threads per team
KOKKOS_INLINE_FUNCTION int getThreadScratchLevel(const int level) const