Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
const_basis/advection.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Sacado.hpp"
11 #include "advection.hpp"
12 #include "common.hpp"
13 
14 #include "Kokkos_Timer.hpp"
15 
16 template<typename FluxView, typename WgbView, typename SrcView,
17  typename WbsView, typename ResidualView>
18 void run_fad_flat(const FluxView& flux, const WgbView& wgb,
19  const SrcView& src, const WbsView& wbs,
20  const ResidualView& residual)
21 {
22  typedef typename ResidualView::execution_space execution_space;
23  typedef typename ResidualView::non_const_value_type scalar_type;
24 
25  const size_t num_cells = wgb.extent(0);
26  const int num_basis = wgb.extent(1);
27  const int num_points = wgb.extent(2);
28  const int num_dim = wgb.extent(3);
29 
30  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
31  KOKKOS_LAMBDA (const size_t cell)
32  {
33  scalar_type value, value2;
34  for (int basis=0; basis<num_basis; ++basis) {
35  value = 0.0;
36  value2 = 0.0;
37  for (int qp=0; qp<num_points; ++qp) {
38  for (int dim=0; dim<num_dim; ++dim)
39  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
40  value2 += src(cell,qp)*wbs(cell,basis,qp);
41  }
42  residual(cell,basis) = value+value2;
43  }
44  });
45 }
46 
47 template<typename FluxView, typename WgbView, typename SrcView,
48  typename WbsView, typename ResidualView>
49 void run_fad_scratch(const FluxView& flux, const WgbView& wgb,
50  const SrcView& src, const WbsView& wbs,
51  const ResidualView& residual)
52 {
53  typedef typename ResidualView::execution_space execution_space;
54  typedef typename ResidualView::non_const_value_type scalar_type;
55  typedef Kokkos::TeamPolicy<execution_space> policy_type;
56  typedef typename policy_type::member_type team_member;
57  typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
58 
59  const size_t num_cells = wgb.extent(0);
60  const int num_basis = wgb.extent(1);
61  const int num_points = wgb.extent(2);
62  const int num_dim = wgb.extent(3);
63 
64  const int vector_size = 1;
65  const int team_size = is_cuda_space<execution_space>::value ? 32 : 1;
66  const int fad_size = Kokkos::dimension_scalar(residual);
67  const size_t range = (num_cells+team_size-1)/team_size;
68  const size_t bytes = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
69  policy_type policy(range,team_size,vector_size);
70 
71  Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
72  KOKKOS_LAMBDA (const team_member& team)
73  {
74  const int team_rank = team.team_rank();
75  tmp_scratch_type value(team.team_scratch(0), team_size, fad_size);
76  tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
77  const size_t cell = team.league_rank()*team_size + team_rank;
78  if (cell < num_cells) {
79  for (int basis=0; basis<num_basis; ++basis) {
80  value(team_rank) = 0.0;
81  value2(team_rank) = 0.0;
82  for (int qp=0; qp<num_points; ++qp) {
83  for (int dim=0; dim<num_dim; ++dim)
84  value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
85  value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
86  }
87  residual(cell,basis) = value(team_rank)+value2(team_rank);
88  }
89  }
90  });
91 }
92 
93 template<int N, typename FluxView, typename WgbView, typename SrcView,
94  typename WbsView, typename ResidualView>
95 void run_analytic_flat(const FluxView& flux, const WgbView& wgb,
96  const SrcView& src, const WbsView& wbs,
97  const ResidualView& residual)
98 {
99  typedef typename ResidualView::execution_space execution_space;
100  typedef typename ResidualView::non_const_value_type scalar_type;
101 
102  const size_t num_cells = wgb.extent(0);
103  const int num_basis = wgb.extent(1);
104  const int num_points = wgb.extent(2);
105  const int num_dim = wgb.extent(3);
106 
107  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
108  KOKKOS_LAMBDA (const size_t cell)
109  {
110  scalar_type value[N+1],value2[N+1];
111  for (int basis=0; basis<num_basis; ++basis) {
112  for (int k=0; k<N+1; ++k) {
113  value[k] = 0.0;
114  value2[k] = 0.0;
115  }
116  for (int qp=0; qp<num_points; ++qp) {
117  for (int dim=0; dim<num_dim; ++dim) {
118  const scalar_type flux_val = flux(cell,qp,dim,N);
119  const scalar_type wgb_val = wgb(cell,basis,qp,dim);
120  value[N] += flux_val*wgb_val;
121  for(int k=0; k<N; k++)
122  value[k] += flux(cell,qp,dim,k)*wgb_val;
123  }
124  const scalar_type src_val = src(cell,qp,N);
125  const scalar_type wbs_val = wbs(cell,basis,qp);
126  value2[N] += src_val*wbs_val;
127  for(int k=0; k<N; k++)
128  value2[k] += src(cell,qp,k)*wbs_val;
129  }
130  for(int k=0; k<=N; k++)
131  residual(cell,basis,k) = value[k]+value2[k];
132  }
133  });
134 }
135 
136 template<int N, typename FluxView, typename WgbView, typename SrcView,
137  typename WbsView, typename ResidualView>
138 void run_analytic_team(const FluxView& flux, const WgbView& wgb,
139  const SrcView& src, const WbsView& wbs,
140  const ResidualView& residual)
141 {
142  typedef typename ResidualView::execution_space execution_space;
143  typedef typename ResidualView::non_const_value_type scalar_type;
144  typedef Kokkos::TeamPolicy<execution_space> policy_type;
145  typedef typename policy_type::member_type team_member;
146  typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
147 
148  const size_t num_cells = wgb.extent(0);
149  const int num_basis = wgb.extent(1);
150  /*const*/ int num_points = wgb.extent(2);
151  /*const*/ int num_dim = wgb.extent(3);
152 
153  const size_t bytes = 2*tmp_scratch_type::shmem_size();
154  policy_type policy(num_cells,num_basis,32);
155  Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
156  KOKKOS_LAMBDA (const team_member& team)
157  {
158  tmp_scratch_type value(team.thread_scratch(0));
159  tmp_scratch_type value2(team.thread_scratch(0));
160  const size_t cell = team.league_rank();
161  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
162  [&] (const int& basis)
163  {
164  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
165  [&] (const int& k)
166  {
167  value(k) = 0;
168  value2(k) = 0;
169  });
170  for (int qp=0; qp<num_points; ++qp) {
171  for (int dim=0; dim<num_dim; ++dim) {
172  const scalar_type flux_val = flux(cell,qp,dim,N);
173  const scalar_type wgb_val = wgb(cell,basis,qp,dim);
174  Kokkos::single(Kokkos::PerThread(team), [&] () {
175  value[N] += flux_val*wgb_val;
176  });
177  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
178  [&] (const int& k)
179  {
180  value[k] += flux(cell,qp,dim,k)*wgb_val;
181  });
182  }
183  const scalar_type src_val = src(cell,qp,N);
184  const scalar_type wbs_val = wbs(cell,basis,qp);
185  Kokkos::single(Kokkos::PerThread(team), [&] () {
186  value2[N] += src_val*wbs_val;
187  });
188  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
189  [&] (const int& k)
190  {
191  value2[k] += src(cell,qp,k)*wbs_val;
192  });
193  }
194  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
195  [&] (const int& k)
196  {
197  residual(cell,basis,k) = value[k]+value2[k];
198  });
199  });
200  });
201 }
202 
203 template <typename FadType, int N, typename ExecSpace>
204 double time_fad_flat(int ncells, int num_basis, int num_points, int ndim,
205  int ntrial, bool check)
206 {
207  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
208  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
209  typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
210  typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
211 
212  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
213  t_3DView_d wbs("",ncells,num_basis,num_points);
214  t_3DView flux("",ncells,num_points,ndim,N+1);
215  t_2DView src("",ncells,num_points,N+1);
216  t_2DView residual("",ncells,num_basis,N+1);
217  init_fad(wgb, wbs, flux, src, residual);
218 
219  // Run once to warm up, complete any UVM transfers
220  run_fad_flat(flux, wgb, src, wbs, residual);
221 
222  // Time execution
223  Kokkos::fence();
224  Kokkos::Timer timer;
225  for (int i=0; i<ntrial; ++i)
226  run_fad_flat(flux, wgb, src, wbs, residual);
227  Kokkos::fence();
228  double time = timer.seconds() / ntrial / ncells;
229 
230  // Check result
231  if (check)
232  check_residual(flux, wgb, src, wbs, residual);
233 
234  return time;
235 }
236 
237 template <typename FadType, int N, typename ExecSpace>
238 double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim,
239  int ntrial, bool check)
240 {
241  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
242  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
243  typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
244  typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
245 
246  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
247  t_3DView_d wbs("",ncells,num_basis,num_points);
248  t_3DView flux("",ncells,num_points,ndim,N+1);
249  t_2DView src("",ncells,num_points,N+1);
250  t_2DView residual("",ncells,num_basis,N+1);
251  init_fad(wgb, wbs, flux, src, residual);
252 
253  // Run once to warm up, complete any UVM transfers
254  run_fad_scratch(flux, wgb, src, wbs, residual);
255 
256  // Time execution
257  Kokkos::fence();
258  Kokkos::Timer timer;
259  for (int i=0; i<ntrial; ++i)
260  run_fad_scratch(flux, wgb, src, wbs, residual);
261  Kokkos::fence();
262  double time = timer.seconds() / ntrial / ncells;
263 
264  // Check result
265  if (check)
266  check_residual(flux, wgb, src, wbs, residual);
267 
268  return time;
269 }
270 
271 template <int N, typename ExecSpace>
272 double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim,
273  int ntrial, bool check)
274 {
275  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
276  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
277  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
278  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
279 
280  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
281  t_3DView_d wbs("",ncells,num_basis,num_points);
282  t_3DView flux("",ncells,num_points,ndim);
283  t_2DView src("",ncells,num_points);
284  t_2DView residual("",ncells,num_basis);
285  init_array(wgb, wbs, flux, src, residual);
286 
287  // Run once to warm up, complete any UVM transfers
288  run_analytic_flat<N>(flux, wgb, src, wbs, residual);
289 
290  // Time execution
291  Kokkos::fence();
292  Kokkos::Timer timer;
293  for (int i=0; i<ntrial; ++i)
294  run_analytic_flat<N>(flux, wgb, src, wbs, residual);
295  Kokkos::fence();
296  double time = timer.seconds() / ntrial / ncells;
297 
298  // Check result
299  if (check)
300  check_residual(flux, wgb, src, wbs, residual);
301 
302  return time;
303 }
304 
305 template <int N, typename ExecSpace>
306 double time_analytic_const(int ncells, int num_basis, int num_points, int ndim,
307  int ntrial, bool check)
308 {
309  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
310  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
311  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
312  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
313 
314  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
315  t_3DView_d wbs("",ncells,num_basis,num_points);
316  t_3DView flux("",ncells,num_points,ndim);
317  t_2DView src("",ncells,num_points);
318  t_2DView residual("",ncells,num_basis);
319  init_array(wgb, wbs, flux, src, residual);
320 
321  typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
322  t_3DView_const flux_const = flux;
323 
324  // Run once to warm up, complete any UVM transfers
325  run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
326 
327  // Time execution
328  Kokkos::fence();
329  Kokkos::Timer timer;
330  for (int i=0; i<ntrial; ++i)
331  run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
332  Kokkos::fence();
333  double time = timer.seconds() / ntrial / ncells;
334 
335  // Check result
336  if (check)
337  check_residual(flux, wgb, src, wbs, residual);
338 
339  return time;
340 }
341 
342 template <int N, typename ExecSpace>
343 double time_analytic_team(int ncells, int num_basis, int num_points, int ndim,
344  int ntrial, bool check)
345 {
346  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
347  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
348  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
349  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
350 
351  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
352  t_3DView_d wbs("",ncells,num_basis,num_points);
353  t_3DView flux("",ncells,num_points,ndim);
354  t_2DView src("",ncells,num_points);
355  t_2DView residual("",ncells,num_basis);
356  init_array(wgb, wbs, flux, src, residual);
357 
358  typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
359  t_3DView_const flux_const = flux;
360 
361  // Run once to warm up, complete any UVM transfers
362  run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
363 
364  // Time execution
365  Kokkos::fence();
366  Kokkos::Timer timer;
367  for (int i=0; i<ntrial; ++i)
368  run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
369  Kokkos::fence();
370  double time = timer.seconds() / ntrial / ncells;
371 
372  // Check result
373  if (check)
374  check_residual(flux, wgb, src, wbs, residual);
375 
376  return time;
377 }
378 
379 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
380  template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
381  template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
382 
383 #define INST_FUNC_N_DEV(N,DEV) \
384  INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \
385  INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \
386  INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \
387  template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
388  template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
389  template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
390 
391 #define INST_FUNC_DEV(DEV) \
392  INST_FUNC_N_DEV( fad_dim, DEV )
393 
394 #ifdef KOKKOS_ENABLE_SERIAL
395 INST_FUNC_DEV(Kokkos::Serial)
396 #endif
397 
398 #ifdef KOKKOS_ENABLE_OPENMP
399 INST_FUNC_DEV(Kokkos::OpenMP)
400 #endif
401 
402 #ifdef KOKKOS_ENABLE_THREADS
403 INST_FUNC_DEV(Kokkos::Threads)
404 #endif
405 
406 #ifdef KOKKOS_ENABLE_CUDA
407 INST_FUNC_DEV(Kokkos::Cuda)
408 #endif
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:49
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:272
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:305
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:206
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:239
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:95
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:139
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
#define INST_FUNC_DEV(DEV)
const int N
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
int value
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:18
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:341