1 #ifndef _COMPADRE_PARALLELMANAGER_HPP_ 
    2 #define _COMPADRE_PARALLELMANAGER_HPP_ 
    4 #include "Compadre_Config.h" 
   72 #ifdef COMPADRE_USE_CUDA 
  104     template<
typename Tag, 
class C>
 
  106             const int vector_lanes_per_thread, C functor)
 const {
 
  110                 Kokkos::parallel_for(
 
  111                     Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
 
  116                     functor, 
typeid(Tag).name());
 
  119             Kokkos::parallel_for(
 
  120                 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
 
  124                 functor, 
typeid(Tag).name());
 
  127             Kokkos::parallel_for(
 
  128                 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
 
  132                 functor, 
typeid(Tag).name());
 
  135             Kokkos::parallel_for(
 
  136                 Kokkos::TeamPolicy<Tag>(batch_size, threads_per_team, vector_lanes_per_thread)
 
  139                 functor, 
typeid(Tag).name());
 
  148             const int vector_lanes_per_thread, C functor, std::string functor_name = 
typeid(C).name())
 const {
 
  152             Kokkos::parallel_for(
 
  153                 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
 
  158                 functor, functor_name);
 
  161             Kokkos::parallel_for(
 
  162                 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
 
  166                 functor, functor_name);
 
  169             Kokkos::parallel_for(
 
  170                 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
 
  174                 functor, functor_name);
 
  177             Kokkos::parallel_for(
 
  178                 Kokkos::TeamPolicy<>(batch_size, threads_per_team, vector_lanes_per_thread)
 
  181                 functor, functor_name);
 
  188     template<
typename Tag, 
class C>
 
  191         CallFunctorWithTeamThreadsAndVectors<Tag,C>(batch_size, this->
getThreadsPerTeam(), 1, functor);
 
  199         CallFunctorWithTeamThreadsAndVectors<C>(batch_size, this->
getThreadsPerTeam(), 1, functor, functor_name);
 
  202     KOKKOS_INLINE_FUNCTION
 
  211     KOKKOS_INLINE_FUNCTION
 
  220     KOKKOS_INLINE_FUNCTION
 
  229     KOKKOS_INLINE_FUNCTION
 
  238     KOKKOS_INLINE_FUNCTION
 
  243     KOKKOS_INLINE_FUNCTION
 
int _vector_lanes_per_thread
 
KOKKOS_INLINE_FUNCTION int getVectorLanesPerThread() const 
 
std::size_t global_index_type
 
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const 
 
void setVectorLanesPerThread(const int value)
 
int _scratch_thread_level_b
higher (slower) level memory for Kokkos::parallel_for for thread access memory 
 
KOKKOS_INLINE_FUNCTION int getTeamScratchSize(const int level) const 
 
int _thread_scratch_size_b
 
int _thread_scratch_size_a
 
KOKKOS_INLINE_FUNCTION int getThreadScratchSize(const int level) const 
 
void setTeamScratchLevel(const int level, const int value)
 
void CallFunctorWithTeamThreadsAndVectors(const global_index_type batch_size, const int threads_per_team, const int vector_lanes_per_thread, C functor, std::string functor_name=typeid(C).name()) const 
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
 
KOKKOS_INLINE_FUNCTION int getThreadsPerTeam(const int vector_lanes_per_thread=1) const 
 
void setThreadScratchLevel(const int level, const int value)
 
void CallFunctorWithTeamThreads(const global_index_type batch_size, C functor) const 
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
 
void CallFunctorWithTeamThreadsAndVectors(const global_index_type batch_size, const int threads_per_team, const int vector_lanes_per_thread, C functor) const 
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
 
int _scratch_team_level_b
lowest level memory for Kokkos::parallel_for for thread access memory 
 
void setTeamScratchSize(const int level, const int value)
 
void setThreadScratchSize(const int level, const int value)
 
int _scratch_thread_level_a
higher (slower) level memory for Kokkos::parallel_for for team access memory 
 
int _scratch_team_level_a
lowest level memory for Kokkos::parallel_for for team access memory 
 
void setThreadsPerTeam(const int value)
 
void CallFunctorWithTeamThreads(const global_index_type batch_size, C functor, std::string functor_name=typeid(C).name()) const 
Calls a parallel_for parallel_for will break out over loops over teams with each thread executing cod...
 
int _threads_per_team
calculated number of threads per team 
 
KOKKOS_INLINE_FUNCTION int getThreadScratchLevel(const int level) const