10 #define SACADO_VIEW_CUDA_HIERARCHICAL 1
11 #define SACADO_ALIGN_SFAD 1
17 #include "Kokkos_Timer.hpp"
19 template<
typename FluxView,
typename WgbView,
typename SrcView,
20 typename WbsView,
typename ResidualView>
22 const SrcView& src,
const WbsView& wbs,
23 const ResidualView& residual)
25 typedef typename ResidualView::execution_space execution_space;
26 typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
27 typedef Kokkos::TeamPolicy<execution_space> policy_type;
28 typedef typename policy_type::member_type team_member;
30 const size_t num_cells = wgb.extent(0);
31 const int num_basis = wgb.extent(1);
32 const int num_points = wgb.extent(2);
33 const int num_dim = wgb.extent(3);
36 const int vector_size = is_cuda ? 32 : 1;
37 const int team_size = is_cuda ? 256/vector_size : 1;
38 const size_t range = (num_cells+team_size-1)/team_size;
40 policy_type policy(range,team_size,vector_size);
41 Kokkos::parallel_for(policy, KOKKOS_LAMBDA (
const team_member& team)
43 const size_t cell = team.league_rank()*team_size + team.team_rank();
44 local_scalar_type
value, value2;
45 for (
int basis=0; basis<num_basis; ++basis) {
48 for (
int qp=0; qp<num_points; ++qp) {
49 for (
int dim=0; dim<num_dim; ++dim)
50 value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
51 value2 += src(cell,qp)*wbs(cell,basis,qp);
53 residual(cell,basis) = value+value2;
58 template<
typename FluxView,
typename WgbView,
typename SrcView,
59 typename WbsView,
typename ResidualView>
61 const SrcView& src,
const WbsView& wbs,
62 const ResidualView& residual)
64 typedef typename ResidualView::execution_space execution_space;
65 typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
66 typedef Kokkos::TeamPolicy<execution_space> policy_type;
67 typedef typename policy_type::member_type team_member;
69 const size_t num_cells = wgb.extent(0);
70 const int num_basis = wgb.extent(1);
71 const int num_points = wgb.extent(2);
72 const int num_dim = wgb.extent(3);
75 const int vector_size = is_cuda ? 32 : 1;
76 const int team_size = is_cuda ? 256/vector_size : 1;
78 policy_type policy(num_cells,team_size,vector_size);
79 Kokkos::parallel_for(policy, KOKKOS_LAMBDA (
const team_member& team)
81 const int team_rank = team.team_rank();
82 const size_t cell = team.league_rank();
83 local_scalar_type
value, value2;
84 for (
int basis=team_rank; basis<num_basis; basis+=team_size) {
87 for (
int qp=0; qp<num_points; ++qp) {
88 for (
int dim=0; dim<num_dim; ++dim)
89 value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
90 value2 += src(cell,qp)*wbs(cell,basis,qp);
92 residual(cell,basis) = value+value2;
97 template <
typename FadType,
int N,
typename ExecSpace>
99 int ndim,
int ntrial,
bool check)
102 #if defined(SACADO_ALIGN_SFAD)
103 static const int Nalign = ((
N+FadStride-1)/FadStride)*FadStride;
104 typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
106 typedef FadType AlignedFadType;
109 typedef typename ExecSpace::array_layout DefaultLayout;
111 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
112 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
113 typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
114 typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
116 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
117 t_3DView_d wbs(
"",ncells,num_basis,num_points);
118 t_3DView flux(
"",ncells,num_points,ndim,
N+1);
119 t_2DView src(
"",ncells,num_points,
N+1);
120 t_2DView residual(
"",ncells,num_basis,
N+1);
121 init_fad(wgb, wbs, flux, src, residual);
129 for (
int i=0;
i<ntrial; ++
i)
132 double time = timer.seconds() / ntrial / ncells;
141 template <
typename FadType,
int N,
typename ExecSpace>
143 int ndim,
int ntrial,
bool check)
146 #if defined(SACADO_ALIGN_SFAD)
147 static const int Nalign = ((
N+FadStride-1)/FadStride)*FadStride;
148 typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
150 typedef FadType AlignedFadType;
153 typedef typename ExecSpace::array_layout DefaultLayout;
155 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
156 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
157 typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
158 typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
160 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
161 t_3DView_d wbs(
"",ncells,num_basis,num_points);
162 t_3DView flux(
"",ncells,num_points,ndim,
N+1);
163 t_2DView src(
"",ncells,num_points,
N+1);
164 t_2DView residual(
"",ncells,num_basis,
N+1);
165 init_fad(wgb, wbs, flux, src, residual);
173 for (
int i=0;
i<ntrial; ++
i)
176 double time = timer.seconds() / ntrial / ncells;
185 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
186 template double time_fad_hierarchical_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
187 template double time_fad_hierarchical_team< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
189 #define INST_FUNC_DEV(DEV) \
190 INST_FUNC_FAD_N_DEV( SFadType, fad_dim, DEV ) \
191 INST_FUNC_FAD_N_DEV( SLFadType, fad_dim, DEV )
193 #ifdef KOKKOS_ENABLE_SERIAL
197 #ifdef KOKKOS_ENABLE_OPENMP
201 #ifdef KOKKOS_ENABLE_THREADS
205 #ifdef KOKKOS_ENABLE_CUDA
void run_fad_hierarchical_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
#define INST_FUNC_DEV(DEV)
double time_fad_hierarchical_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_fad_hierarchical_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)