14 #include "Kokkos_Timer.hpp"
16 template<
typename FluxView,
typename WgbView,
typename SrcView,
17 typename WbsView,
typename ResidualView>
19 const SrcView& src,
const WbsView& wbs,
20 const ResidualView& residual)
22 typedef typename ResidualView::execution_space execution_space;
23 typedef typename ResidualView::non_const_value_type scalar_type;
25 const size_t num_cells = wgb.extent(0);
26 const int num_basis = wgb.extent(1);
27 const int num_points = wgb.extent(2);
28 const int num_dim = wgb.extent(3);
30 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
31 KOKKOS_LAMBDA (
const size_t cell)
33 scalar_type
value, value2;
34 for (
int basis=0; basis<num_basis; ++basis) {
37 for (
int qp=0; qp<num_points; ++qp) {
38 for (
int dim=0; dim<num_dim; ++dim)
39 value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
40 value2 += src(cell,qp)*wbs(cell,basis,qp);
42 residual(cell,basis) = value+value2;
47 template<
typename FluxView,
typename WgbView,
typename SrcView,
48 typename WbsView,
typename ResidualView>
50 const SrcView& src,
const WbsView& wbs,
51 const ResidualView& residual)
53 typedef typename ResidualView::execution_space execution_space;
54 typedef typename ResidualView::non_const_value_type scalar_type;
55 typedef Kokkos::TeamPolicy<execution_space> policy_type;
56 typedef typename policy_type::member_type team_member;
57 typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
59 const size_t num_cells = wgb.extent(0);
60 const int num_basis = wgb.extent(1);
61 const int num_points = wgb.extent(2);
62 const int num_dim = wgb.extent(3);
64 const int vector_size = 1;
66 const int fad_size = Kokkos::dimension_scalar(residual);
67 const size_t range = (num_cells+team_size-1)/team_size;
68 const size_t bytes = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
69 policy_type policy(range,team_size,vector_size);
71 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
72 KOKKOS_LAMBDA (
const team_member& team)
74 const int team_rank = team.team_rank();
75 tmp_scratch_type
value(team.team_scratch(0), team_size, fad_size);
76 tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
77 const size_t cell = team.league_rank()*team_size + team_rank;
78 if (cell < num_cells) {
79 for (
int basis=0; basis<num_basis; ++basis) {
80 value(team_rank) = 0.0;
81 value2(team_rank) = 0.0;
82 for (
int qp=0; qp<num_points; ++qp) {
83 for (
int dim=0; dim<num_dim; ++dim)
84 value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
85 value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
87 residual(cell,basis) =
value(team_rank)+value2(team_rank);
93 template<
int N,
typename FluxView,
typename WgbView,
typename SrcView,
94 typename WbsView,
typename ResidualView>
96 const SrcView& src,
const WbsView& wbs,
97 const ResidualView& residual)
99 typedef typename ResidualView::execution_space execution_space;
100 typedef typename ResidualView::non_const_value_type scalar_type;
102 const size_t num_cells = wgb.extent(0);
103 const int num_basis = wgb.extent(1);
104 const int num_points = wgb.extent(2);
105 const int num_dim = wgb.extent(3);
107 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
108 KOKKOS_LAMBDA (
const size_t cell)
110 scalar_type
value[
N+1],value2[
N+1];
111 for (
int basis=0; basis<num_basis; ++basis) {
112 for (
int k=0; k<
N+1; ++k) {
116 for (
int qp=0; qp<num_points; ++qp) {
117 for (
int dim=0; dim<num_dim; ++dim) {
118 const scalar_type flux_val = flux(cell,qp,dim,N);
119 const scalar_type wgb_val = wgb(cell,basis,qp,dim,N);
120 value[
N] += flux_val*wgb_val;
121 for(
int k=0; k<
N; k++)
123 flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
125 const scalar_type src_val = src(cell,qp,N);
126 const scalar_type wbs_val = wbs(cell,basis,qp,N);
127 value2[
N] += src_val*wbs_val;
128 for(
int k=0; k<
N; k++)
129 value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
131 for(
int k=0; k<=
N; k++)
132 residual(cell,basis,k) = value[k]+value2[k];
137 template<
int N,
typename FluxView,
typename WgbView,
typename SrcView,
138 typename WbsView,
typename ResidualView>
140 const SrcView& src,
const WbsView& wbs,
141 const ResidualView& residual)
143 typedef typename ResidualView::execution_space execution_space;
144 typedef typename ResidualView::non_const_value_type scalar_type;
145 typedef Kokkos::TeamPolicy<execution_space> policy_type;
146 typedef typename policy_type::member_type team_member;
147 typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
149 const size_t num_cells = wgb.extent(0);
150 const int num_basis = wgb.extent(1);
151 int num_points = wgb.extent(2);
152 int num_dim = wgb.extent(3);
154 const size_t bytes = 2*tmp_scratch_type::shmem_size();
155 policy_type policy(num_cells,num_basis,32);
156 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
157 KOKKOS_LAMBDA (
const team_member& team)
159 tmp_scratch_type
value(team.thread_scratch(0));
160 tmp_scratch_type value2(team.thread_scratch(0));
161 const size_t cell = team.league_rank();
162 Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
163 [&] (
const int& basis)
165 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N+1),
171 for (
int qp=0; qp<num_points; ++qp) {
172 for (
int dim=0; dim<num_dim; ++dim) {
173 const scalar_type flux_val = flux(cell,qp,dim,
N);
174 const scalar_type wgb_val = wgb(cell,basis,qp,dim,
N);
175 Kokkos::single(Kokkos::PerThread(team), [&] () {
176 value[
N] += flux_val*wgb_val;
178 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N),
182 flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
185 const scalar_type src_val = src(cell,qp,
N);
186 const scalar_type wbs_val = wbs(cell,basis,qp,
N);
187 Kokkos::single(Kokkos::PerThread(team), [&] () {
188 value2[
N] += src_val*wbs_val;
190 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N),
193 value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
196 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N+1),
199 residual(cell,basis,k) =
value[k]+value2[k];
205 template <
typename FadType,
int N,
typename ExecSpace>
207 int ntrial,
bool check)
209 typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
210 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
211 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
213 t_4DView wgb(
"",ncells,num_basis,num_points,ndim,
N+1);
214 t_3DView flux(
"",ncells,num_points,ndim,
N+1);
215 t_3DView wbs(
"",ncells,num_basis,num_points,
N+1);
216 t_2DView src(
"",ncells,num_points,
N+1);
217 t_2DView residual(
"",ncells,num_basis,
N+1);
218 init_fad(wgb, wbs, flux, src, residual);
226 for (
int i=0;
i<ntrial; ++
i)
229 double time = timer.seconds() / ntrial / ncells;
238 template <
typename FadType,
int N,
typename ExecSpace>
240 int ntrial,
bool check)
242 typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
243 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
244 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
246 t_4DView wgb(
"",ncells,num_basis,num_points,ndim,
N+1);
247 t_3DView flux(
"",ncells,num_points,ndim,
N+1);
248 t_3DView wbs(
"",ncells,num_basis,num_points,
N+1);
249 t_2DView src(
"",ncells,num_points,
N+1);
250 t_2DView residual(
"",ncells,num_basis,
N+1);
251 init_fad(wgb, wbs, flux, src, residual);
259 for (
int i=0;
i<ntrial; ++
i)
262 double time = timer.seconds() / ntrial / ncells;
271 template <
int N,
typename ExecSpace>
273 int ntrial,
bool check)
275 typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
276 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
277 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
279 t_4DView wgb(
"",ncells,num_basis,num_points,ndim);
280 t_3DView flux(
"",ncells,num_points,ndim);
281 t_3DView wbs(
"",ncells,num_basis,num_points);
282 t_2DView src(
"",ncells,num_points);
283 t_2DView residual(
"",ncells,num_basis);
287 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
292 for (
int i=0;
i<ntrial; ++
i)
293 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
295 double time = timer.seconds() / ntrial / ncells;
304 template <
int N,
typename ExecSpace>
306 int ntrial,
bool check)
308 typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
309 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
310 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
311 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
313 t_4DView wgb(
"",ncells,num_basis,num_points,ndim);
314 t_3DView flux(
"",ncells,num_points,ndim);
315 t_3DView wbs(
"",ncells,num_basis,num_points);
316 t_2DView src(
"",ncells,num_points);
317 t_2DView residual(
"",ncells,num_basis);
320 t_3DView_const flux_const = flux;
323 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
328 for (
int i=0;
i<ntrial; ++
i)
329 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
331 double time = timer.seconds() / ntrial / ncells;
340 template <
int N,
typename ExecSpace>
342 int ntrial,
bool check)
344 typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
345 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
346 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
347 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
349 t_4DView wgb(
"",ncells,num_basis,num_points,ndim);
350 t_3DView flux(
"",ncells,num_points,ndim);
351 t_3DView wbs(
"",ncells,num_basis,num_points);
352 t_2DView src(
"",ncells,num_points);
353 t_2DView residual(
"",ncells,num_basis);
356 t_3DView_const flux_const = flux;
359 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
364 for (
int i=0;
i<ntrial; ++
i)
365 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
367 double time = timer.seconds() / ntrial / ncells;
376 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
377 template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
378 template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
380 #define INST_FUNC_N_DEV(N,DEV) \
381 INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \
382 INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \
383 INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \
384 template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
385 template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
386 template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
388 #define INST_FUNC_DEV(DEV) \
389 INST_FUNC_N_DEV( fad_dim, DEV )
391 #ifdef KOKKOS_ENABLE_SERIAL
395 #ifdef KOKKOS_ENABLE_OPENMP
399 #ifdef KOKKOS_ENABLE_THREADS
403 #ifdef KOKKOS_ENABLE_CUDA
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
#define INST_FUNC_DEV(DEV)
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)