14 #include "Kokkos_Timer.hpp"
16 template<
typename FluxView,
typename WgbView,
typename SrcView,
17 typename WbsView,
typename ResidualView>
19 const SrcView& src,
const WbsView& wbs,
20 const ResidualView& residual)
22 typedef typename ResidualView::execution_space execution_space;
23 typedef typename ResidualView::non_const_value_type scalar_type;
25 const size_t num_cells = wgb.extent(0);
26 const int num_basis = wgb.extent(1);
27 const int num_points = wgb.extent(2);
28 const int num_dim = wgb.extent(3);
30 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
31 KOKKOS_LAMBDA (
const size_t cell)
33 scalar_type
value, value2;
34 for (
int basis=0; basis<num_basis; ++basis) {
37 for (
int qp=0; qp<num_points; ++qp) {
38 for (
int dim=0; dim<num_dim; ++dim)
39 value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
40 value2 += src(cell,qp)*wbs(cell,basis,qp);
42 residual(cell,basis) = value+value2;
47 template<
typename FluxView,
typename WgbView,
typename SrcView,
48 typename WbsView,
typename ResidualView>
50 const SrcView& src,
const WbsView& wbs,
51 const ResidualView& residual)
53 typedef typename ResidualView::execution_space execution_space;
54 typedef typename ResidualView::non_const_value_type scalar_type;
55 typedef Kokkos::TeamPolicy<execution_space> policy_type;
56 typedef typename policy_type::member_type team_member;
57 typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
59 const size_t num_cells = wgb.extent(0);
60 const int num_basis = wgb.extent(1);
61 const int num_points = wgb.extent(2);
62 const int num_dim = wgb.extent(3);
64 const int vector_size = 1;
66 const int fad_size = Kokkos::dimension_scalar(residual);
67 const size_t range = (num_cells+team_size-1)/team_size;
68 const size_t bytes = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
69 policy_type policy(range,team_size,vector_size);
71 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
72 KOKKOS_LAMBDA (
const team_member& team)
74 const int team_rank = team.team_rank();
75 tmp_scratch_type
value(team.team_scratch(0), team_size, fad_size);
76 tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
77 const size_t cell = team.league_rank()*team_size + team_rank;
78 if (cell < num_cells) {
79 for (
int basis=0; basis<num_basis; ++basis) {
80 value(team_rank) = 0.0;
81 value2(team_rank) = 0.0;
82 for (
int qp=0; qp<num_points; ++qp) {
83 for (
int dim=0; dim<num_dim; ++dim)
84 value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
85 value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
87 residual(cell,basis) =
value(team_rank)+value2(team_rank);
93 template<
int N,
typename FluxView,
typename WgbView,
typename SrcView,
94 typename WbsView,
typename ResidualView>
96 const SrcView& src,
const WbsView& wbs,
97 const ResidualView& residual)
99 typedef typename ResidualView::execution_space execution_space;
100 typedef typename ResidualView::non_const_value_type scalar_type;
102 const size_t num_cells = wgb.extent(0);
103 const int num_basis = wgb.extent(1);
104 const int num_points = wgb.extent(2);
105 const int num_dim = wgb.extent(3);
107 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
108 KOKKOS_LAMBDA (
const size_t cell)
110 scalar_type
value[
N+1],value2[
N+1];
111 for (
int basis=0; basis<num_basis; ++basis) {
112 for (
int k=0; k<
N+1; ++k) {
116 for (
int qp=0; qp<num_points; ++qp) {
117 for (
int dim=0; dim<num_dim; ++dim) {
118 const scalar_type flux_val = flux(cell,qp,dim,N);
119 const scalar_type wgb_val = wgb(cell,basis,qp,dim);
120 value[
N] += flux_val*wgb_val;
121 for(
int k=0; k<
N; k++)
122 value[k] += flux(cell,qp,dim,k)*wgb_val;
124 const scalar_type src_val = src(cell,qp,N);
125 const scalar_type wbs_val = wbs(cell,basis,qp);
126 value2[
N] += src_val*wbs_val;
127 for(
int k=0; k<
N; k++)
128 value2[k] += src(cell,qp,k)*wbs_val;
130 for(
int k=0; k<=
N; k++)
131 residual(cell,basis,k) = value[k]+value2[k];
136 template<
int N,
typename FluxView,
typename WgbView,
typename SrcView,
137 typename WbsView,
typename ResidualView>
139 const SrcView& src,
const WbsView& wbs,
140 const ResidualView& residual)
142 typedef typename ResidualView::execution_space execution_space;
143 typedef typename ResidualView::non_const_value_type scalar_type;
144 typedef Kokkos::TeamPolicy<execution_space> policy_type;
145 typedef typename policy_type::member_type team_member;
146 typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
148 const size_t num_cells = wgb.extent(0);
149 const int num_basis = wgb.extent(1);
150 int num_points = wgb.extent(2);
151 int num_dim = wgb.extent(3);
153 const size_t bytes = 2*tmp_scratch_type::shmem_size();
154 policy_type policy(num_cells,num_basis,32);
155 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
156 KOKKOS_LAMBDA (
const team_member& team)
158 tmp_scratch_type
value(team.thread_scratch(0));
159 tmp_scratch_type value2(team.thread_scratch(0));
160 const size_t cell = team.league_rank();
161 Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
162 [&] (
const int& basis)
164 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N+1),
170 for (
int qp=0; qp<num_points; ++qp) {
171 for (
int dim=0; dim<num_dim; ++dim) {
172 const scalar_type flux_val = flux(cell,qp,dim,
N);
173 const scalar_type wgb_val = wgb(cell,basis,qp,dim);
174 Kokkos::single(Kokkos::PerThread(team), [&] () {
175 value[
N] += flux_val*wgb_val;
177 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N),
180 value[k] += flux(cell,qp,dim,k)*wgb_val;
183 const scalar_type src_val = src(cell,qp,
N);
184 const scalar_type wbs_val = wbs(cell,basis,qp);
185 Kokkos::single(Kokkos::PerThread(team), [&] () {
186 value2[
N] += src_val*wbs_val;
188 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N),
191 value2[k] += src(cell,qp,k)*wbs_val;
194 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N+1),
197 residual(cell,basis,k) =
value[k]+value2[k];
203 template <
typename FadType,
int N,
typename ExecSpace>
205 int ntrial,
bool check)
207 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
208 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
209 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
210 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
212 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
213 t_3DView_d wbs(
"",ncells,num_basis,num_points);
214 t_3DView flux(
"",ncells,num_points,ndim,
N+1);
215 t_2DView src(
"",ncells,num_points,
N+1);
216 t_2DView residual(
"",ncells,num_basis,
N+1);
217 init_fad(wgb, wbs, flux, src, residual);
225 for (
int i=0;
i<ntrial; ++
i)
228 double time = timer.seconds() / ntrial / ncells;
237 template <
typename FadType,
int N,
typename ExecSpace>
239 int ntrial,
bool check)
241 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
242 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
243 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
244 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
246 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
247 t_3DView_d wbs(
"",ncells,num_basis,num_points);
248 t_3DView flux(
"",ncells,num_points,ndim,
N+1);
249 t_2DView src(
"",ncells,num_points,
N+1);
250 t_2DView residual(
"",ncells,num_basis,
N+1);
251 init_fad(wgb, wbs, flux, src, residual);
259 for (
int i=0;
i<ntrial; ++
i)
262 double time = timer.seconds() / ntrial / ncells;
271 template <
int N,
typename ExecSpace>
273 int ntrial,
bool check)
275 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
276 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
277 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
278 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
280 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
281 t_3DView_d wbs(
"",ncells,num_basis,num_points);
282 t_3DView flux(
"",ncells,num_points,ndim);
283 t_2DView src(
"",ncells,num_points);
284 t_2DView residual(
"",ncells,num_basis);
288 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
293 for (
int i=0;
i<ntrial; ++
i)
294 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
296 double time = timer.seconds() / ntrial / ncells;
305 template <
int N,
typename ExecSpace>
307 int ntrial,
bool check)
309 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
310 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
311 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
312 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
314 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
315 t_3DView_d wbs(
"",ncells,num_basis,num_points);
316 t_3DView flux(
"",ncells,num_points,ndim);
317 t_2DView src(
"",ncells,num_points);
318 t_2DView residual(
"",ncells,num_basis);
321 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
322 t_3DView_const flux_const = flux;
325 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
330 for (
int i=0;
i<ntrial; ++
i)
331 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
333 double time = timer.seconds() / ntrial / ncells;
342 template <
int N,
typename ExecSpace>
344 int ntrial,
bool check)
346 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
347 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
348 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
349 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
351 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
352 t_3DView_d wbs(
"",ncells,num_basis,num_points);
353 t_3DView flux(
"",ncells,num_points,ndim);
354 t_2DView src(
"",ncells,num_points);
355 t_2DView residual(
"",ncells,num_basis);
358 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
359 t_3DView_const flux_const = flux;
362 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
367 for (
int i=0;
i<ntrial; ++
i)
368 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
370 double time = timer.seconds() / ntrial / ncells;
379 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
380 template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
381 template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
383 #define INST_FUNC_N_DEV(N,DEV) \
384 INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \
385 INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \
386 INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \
387 template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
388 template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
389 template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
391 #define INST_FUNC_DEV(DEV) \
392 INST_FUNC_N_DEV( fad_dim, DEV )
394 #ifdef KOKKOS_ENABLE_SERIAL
398 #ifdef KOKKOS_ENABLE_OPENMP
402 #ifdef KOKKOS_ENABLE_THREADS
406 #ifdef KOKKOS_ENABLE_CUDA
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
#define INST_FUNC_DEV(DEV)
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)