34 #include "impl/Kokkos_Timer.hpp"
36 template<
typename FluxView,
typename WgbView,
typename SrcView,
37 typename WbsView,
typename ResidualView>
39 const SrcView& src,
const WbsView& wbs,
40 const ResidualView& residual)
42 typedef typename ResidualView::execution_space execution_space;
43 typedef typename ResidualView::non_const_value_type scalar_type;
45 const size_t num_cells = wgb.extent(0);
46 const int num_basis = wgb.extent(1);
47 const int num_points = wgb.extent(2);
48 const int num_dim = wgb.extent(3);
50 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
51 KOKKOS_LAMBDA (
const size_t cell)
53 scalar_type value, value2;
54 for (
int basis=0; basis<num_basis; ++basis) {
57 for (
int qp=0; qp<num_points; ++qp) {
58 for (
int dim=0; dim<num_dim; ++dim)
59 value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
60 value2 += src(cell,qp)*wbs(cell,basis,qp);
62 residual(cell,basis) = value+value2;
67 template<
typename FluxView,
typename WgbView,
typename SrcView,
68 typename WbsView,
typename ResidualView>
70 const SrcView& src,
const WbsView& wbs,
71 const ResidualView& residual)
73 typedef typename ResidualView::execution_space execution_space;
74 typedef typename ResidualView::non_const_value_type scalar_type;
75 typedef Kokkos::TeamPolicy<execution_space> policy_type;
76 typedef typename policy_type::member_type team_member;
77 typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
79 const size_t num_cells = wgb.extent(0);
80 const int num_basis = wgb.extent(1);
81 const int num_points = wgb.extent(2);
82 const int num_dim = wgb.extent(3);
84 const int vector_size = 1;
86 const int fad_size = Kokkos::dimension_scalar(residual);
87 const size_t range = (num_cells+team_size-1)/team_size;
88 const size_t bytes = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
89 policy_type policy(range,team_size,vector_size);
91 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
92 KOKKOS_LAMBDA (
const team_member& team)
94 const int team_rank = team.team_rank();
95 tmp_scratch_type value(team.team_scratch(0), team_size, fad_size);
96 tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
97 const size_t cell = team.league_rank()*team_size + team_rank;
98 if (cell < num_cells) {
99 for (
int basis=0; basis<num_basis; ++basis) {
100 value(team_rank) = 0.0;
101 value2(team_rank) = 0.0;
102 for (
int qp=0; qp<num_points; ++qp) {
103 for (
int dim=0; dim<num_dim; ++dim)
104 value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
105 value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
107 residual(cell,basis) = value(team_rank)+value2(team_rank);
113 template<
int N,
typename FluxView,
typename WgbView,
typename SrcView,
114 typename WbsView,
typename ResidualView>
116 const SrcView& src,
const WbsView& wbs,
117 const ResidualView& residual)
119 typedef typename ResidualView::execution_space execution_space;
120 typedef typename ResidualView::non_const_value_type scalar_type;
122 const size_t num_cells = wgb.extent(0);
123 const int num_basis = wgb.extent(1);
124 const int num_points = wgb.extent(2);
125 const int num_dim = wgb.extent(3);
127 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
128 KOKKOS_LAMBDA (
const size_t cell)
130 scalar_type value[
N+1],value2[
N+1];
131 for (
int basis=0; basis<num_basis; ++basis) {
132 for (
int k=0; k<
N+1; ++k) {
136 for (
int qp=0; qp<num_points; ++qp) {
137 for (
int dim=0; dim<num_dim; ++dim) {
138 const scalar_type flux_val = flux(cell,qp,dim,N);
139 const scalar_type wgb_val = wgb(cell,basis,qp,dim);
140 value[
N] += flux_val*wgb_val;
141 for(
int k=0; k<
N; k++)
142 value[k] += flux(cell,qp,dim,k)*wgb_val;
144 const scalar_type src_val = src(cell,qp,N);
145 const scalar_type wbs_val = wbs(cell,basis,qp);
146 value2[
N] += src_val*wbs_val;
147 for(
int k=0; k<
N; k++)
148 value2[k] += src(cell,qp,k)*wbs_val;
150 for(
int k=0; k<=
N; k++)
151 residual(cell,basis,k) = value[k]+value2[k];
156 template<
int N,
typename FluxView,
typename WgbView,
typename SrcView,
157 typename WbsView,
typename ResidualView>
159 const SrcView& src,
const WbsView& wbs,
160 const ResidualView& residual)
162 typedef typename ResidualView::execution_space execution_space;
163 typedef typename ResidualView::non_const_value_type scalar_type;
164 typedef Kokkos::TeamPolicy<execution_space> policy_type;
165 typedef typename policy_type::member_type team_member;
166 typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
168 const size_t num_cells = wgb.extent(0);
169 const int num_basis = wgb.extent(1);
170 int num_points = wgb.extent(2);
171 int num_dim = wgb.extent(3);
173 const size_t bytes = 2*tmp_scratch_type::shmem_size();
174 policy_type policy(num_cells,num_basis,32);
175 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
176 KOKKOS_LAMBDA (
const team_member& team)
178 tmp_scratch_type value(team.thread_scratch(0));
179 tmp_scratch_type value2(team.thread_scratch(0));
180 const size_t cell = team.league_rank();
181 Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
182 [&] (
const int& basis)
184 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N+1),
190 for (
int qp=0; qp<num_points; ++qp) {
191 for (
int dim=0; dim<num_dim; ++dim) {
192 const scalar_type flux_val = flux(cell,qp,dim,
N);
193 const scalar_type wgb_val = wgb(cell,basis,qp,dim);
194 Kokkos::single(Kokkos::PerThread(team), [&] () {
195 value[
N] += flux_val*wgb_val;
197 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N),
200 value[k] += flux(cell,qp,dim,k)*wgb_val;
203 const scalar_type src_val = src(cell,qp,
N);
204 const scalar_type wbs_val = wbs(cell,basis,qp);
205 Kokkos::single(Kokkos::PerThread(team), [&] () {
206 value2[
N] += src_val*wbs_val;
208 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N),
211 value2[k] += src(cell,qp,k)*wbs_val;
214 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,
N+1),
217 residual(cell,basis,k) = value[k]+value2[k];
223 template <
typename FadType,
int N,
typename ExecSpace>
225 int ntrial,
bool check)
227 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
228 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
229 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
230 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
232 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
233 t_3DView_d wbs(
"",ncells,num_basis,num_points);
234 t_3DView flux(
"",ncells,num_points,ndim,
N+1);
235 t_2DView src(
"",ncells,num_points,
N+1);
236 t_2DView residual(
"",ncells,num_basis,
N+1);
237 init_fad(wgb, wbs, flux, src, residual);
244 Kokkos::Impl::Timer timer;
245 for (
int i=0; i<ntrial; ++i)
248 double time = timer.seconds() / ntrial / ncells;
257 template <
typename FadType,
int N,
typename ExecSpace>
259 int ntrial,
bool check)
261 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
262 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
263 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
264 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
266 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
267 t_3DView_d wbs(
"",ncells,num_basis,num_points);
268 t_3DView flux(
"",ncells,num_points,ndim,
N+1);
269 t_2DView src(
"",ncells,num_points,
N+1);
270 t_2DView residual(
"",ncells,num_basis,
N+1);
271 init_fad(wgb, wbs, flux, src, residual);
278 Kokkos::Impl::Timer timer;
279 for (
int i=0; i<ntrial; ++i)
282 double time = timer.seconds() / ntrial / ncells;
291 template <
int N,
typename ExecSpace>
293 int ntrial,
bool check)
295 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
296 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
297 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
298 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
300 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
301 t_3DView_d wbs(
"",ncells,num_basis,num_points);
302 t_3DView flux(
"",ncells,num_points,ndim);
303 t_2DView src(
"",ncells,num_points);
304 t_2DView residual(
"",ncells,num_basis);
308 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
312 Kokkos::Impl::Timer timer;
313 for (
int i=0; i<ntrial; ++i)
314 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
316 double time = timer.seconds() / ntrial / ncells;
325 template <
int N,
typename ExecSpace>
327 int ntrial,
bool check)
329 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
330 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
331 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
332 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
334 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
335 t_3DView_d wbs(
"",ncells,num_basis,num_points);
336 t_3DView flux(
"",ncells,num_points,ndim);
337 t_2DView src(
"",ncells,num_points);
338 t_2DView residual(
"",ncells,num_basis);
341 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
342 t_3DView_const flux_const = flux;
345 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
349 Kokkos::Impl::Timer timer;
350 for (
int i=0; i<ntrial; ++i)
351 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
353 double time = timer.seconds() / ntrial / ncells;
362 template <
int N,
typename ExecSpace>
364 int ntrial,
bool check)
366 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
367 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
368 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
369 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
371 t_4DView_d wgb(
"",ncells,num_basis,num_points,ndim);
372 t_3DView_d wbs(
"",ncells,num_basis,num_points);
373 t_3DView flux(
"",ncells,num_points,ndim);
374 t_2DView src(
"",ncells,num_points);
375 t_2DView residual(
"",ncells,num_basis);
378 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
379 t_3DView_const flux_const = flux;
382 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
386 Kokkos::Impl::Timer timer;
387 for (
int i=0; i<ntrial; ++i)
388 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
390 double time = timer.seconds() / ntrial / ncells;
399 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
400 template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
401 template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
403 #define INST_FUNC_N_DEV(N,DEV) \
404 INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \
405 INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \
406 INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \
407 template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
408 template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
409 template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
411 #define INST_FUNC_DEV(DEV) \
412 INST_FUNC_N_DEV( fad_dim, DEV )
414 #ifdef KOKKOS_ENABLE_SERIAL
418 #ifdef KOKKOS_ENABLE_OPENMP
422 #ifdef KOKKOS_ENABLE_THREADS
426 #ifdef KOKKOS_ENABLE_CUDA
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
#define INST_FUNC_DEV(DEV)
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)