30 #define SACADO_VIEW_CUDA_HIERARCHICAL 1 
   31 #define SACADO_ALIGN_SFAD 1 
   37 #include "impl/Kokkos_Timer.hpp" 
   39 template<
typename FluxView, 
typename WgbView, 
typename SrcView,
 
   40          typename WbsView, 
typename ResidualView>
 
   42                                const SrcView& src, 
const WbsView& wbs,
 
   43                                const ResidualView& residual)
 
   45   typedef typename ResidualView::execution_space execution_space;
 
   46   typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
 
   47   typedef Kokkos::TeamPolicy<execution_space> policy_type;
 
   48   typedef typename policy_type::member_type team_member;
 
   50   const size_t num_cells = wgb.extent(0);
 
   51   const int num_basis    = wgb.extent(1);
 
   52   const int num_points   = wgb.extent(2);
 
   53   const int num_dim      = wgb.extent(3);
 
   56   const int vector_size  = is_cuda ? 32 : 1;
 
   57   const int team_size    = is_cuda ? 256/vector_size : 1;
 
   58   const size_t range     = (num_cells+team_size-1)/team_size;
 
   60   policy_type policy(range,team_size,vector_size);
 
   61   Kokkos::parallel_for(policy, KOKKOS_LAMBDA (
const team_member& team)
 
   63     const size_t cell = team.league_rank()*team_size + team.team_rank();
 
   64     local_scalar_type value, value2;
 
   65     for (
int basis=0; basis<num_basis; ++basis) {
 
   68       for (
int qp=0; qp<num_points; ++qp) {
 
   69         for (
int dim=0; dim<num_dim; ++dim)
 
   70           value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
 
   71         value2 += src(cell,qp)*wbs(cell,basis,qp);
 
   73       residual(cell,basis) = value+value2;
 
   78 template<
typename FluxView, 
typename WgbView, 
typename SrcView,
 
   79          typename WbsView, 
typename ResidualView>
 
   81                                const SrcView& src, 
const WbsView& wbs,
 
   82                                const ResidualView& residual)
 
   84   typedef typename ResidualView::execution_space execution_space;
 
   85   typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
 
   86   typedef Kokkos::TeamPolicy<execution_space> policy_type;
 
   87   typedef typename policy_type::member_type team_member;
 
   89   const size_t num_cells = wgb.extent(0);
 
   90   const int num_basis    = wgb.extent(1);
 
   91   const int num_points   = wgb.extent(2);
 
   92   const int num_dim      = wgb.extent(3);
 
   95   const int vector_size  = is_cuda ? 32 : 1;
 
   96   const int team_size    = is_cuda ? 256/vector_size : 1;
 
   98   policy_type policy(num_cells,team_size,vector_size);
 
   99   Kokkos::parallel_for(policy, KOKKOS_LAMBDA (
const team_member& team)
 
  101     const int team_rank = team.team_rank();
 
  102     const size_t cell = team.league_rank();
 
  103     local_scalar_type value, value2;
 
  104     for (
int basis=team_rank; basis<num_basis; basis+=team_size) {
 
  107       for (
int qp=0; qp<num_points; ++qp) {
 
  108         for (
int dim=0; dim<num_dim; ++dim)
 
  109           value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
 
  110         value2 += src(cell,qp)*wbs(cell,basis,qp);
 
  112       residual(cell,basis) = value+value2;
 
  117 template <
typename FadType, 
int N, 
typename ExecSpace>
 
  119                                   int ndim, 
int ntrial, 
bool check)
 
  122 #if defined(SACADO_ALIGN_SFAD) 
  123   static const int Nalign = ((
N+FadStride-1)/FadStride)*FadStride;
 
  124   typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
 
  126   typedef FadType AlignedFadType;
 
  129   typedef typename ExecSpace::array_layout DefaultLayout;
 
  131   typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
 
  132   typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
 
  133   typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
 
  134   typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
 
  136   t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
 
  137   t_3DView_d wbs(
"",ncells,num_basis,num_points);
 
  138   t_3DView flux(
"",ncells,num_points,ndim,
N+1);
 
  139   t_2DView src(
"",ncells,num_points,
N+1);
 
  140   t_2DView residual(
"",ncells,num_basis,
N+1);
 
  141   init_fad(wgb, wbs, flux, src, residual);
 
  148   Kokkos::Impl::Timer timer;
 
  149   for (
int i=0; i<ntrial; ++i)
 
  152   double time = timer.seconds() / ntrial / ncells;
 
  161 template <
typename FadType, 
int N, 
typename ExecSpace>
 
  163                                   int ndim, 
int ntrial, 
bool check)
 
  166 #if defined(SACADO_ALIGN_SFAD) 
  167   static const int Nalign = ((
N+FadStride-1)/FadStride)*FadStride;
 
  168   typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
 
  170   typedef FadType AlignedFadType;
 
  173   typedef typename ExecSpace::array_layout DefaultLayout;
 
  175   typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
 
  176   typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
 
  177   typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
 
  178   typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
 
  180   t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
 
  181   t_3DView_d wbs(
"",ncells,num_basis,num_points);
 
  182   t_3DView flux(
"",ncells,num_points,ndim,
N+1);
 
  183   t_2DView src(
"",ncells,num_points,
N+1);
 
  184   t_2DView residual(
"",ncells,num_basis,
N+1);
 
  185   init_fad(wgb, wbs, flux, src, residual);
 
  192   Kokkos::Impl::Timer timer;
 
  193   for (
int i=0; i<ntrial; ++i)
 
  196   double time = timer.seconds() / ntrial / ncells;
 
  205 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \ 
  206   template double time_fad_hierarchical_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \ 
  207   template double time_fad_hierarchical_team< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); 
  209 #define INST_FUNC_DEV(DEV) \ 
  210   INST_FUNC_FAD_N_DEV( SFadType, fad_dim, DEV ) \ 
  211   INST_FUNC_FAD_N_DEV( SLFadType, fad_dim, DEV ) 
  213 #ifdef KOKKOS_ENABLE_SERIAL 
  217 #ifdef KOKKOS_ENABLE_OPENMP 
  221 #ifdef KOKKOS_ENABLE_THREADS 
  225 #ifdef KOKKOS_ENABLE_CUDA 
void run_fad_hierarchical_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
 
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
 
#define INST_FUNC_DEV(DEV)
 
double time_fad_hierarchical_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
 
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
 
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
 
double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
 
void run_fad_hierarchical_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)