Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
advection.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Sacado.hpp"
11 #include "advection.hpp"
12 #include "common.hpp"
13 
14 #include "Kokkos_Timer.hpp"
15 
16 template<typename FluxView, typename WgbView, typename SrcView,
17  typename WbsView, typename ResidualView>
18 void run_fad_flat(const FluxView& flux, const WgbView& wgb,
19  const SrcView& src, const WbsView& wbs,
20  const ResidualView& residual)
21 {
22  typedef typename ResidualView::execution_space execution_space;
23  typedef typename ResidualView::non_const_value_type scalar_type;
24 
25  const size_t num_cells = wgb.extent(0);
26  const int num_basis = wgb.extent(1);
27  const int num_points = wgb.extent(2);
28  const int num_dim = wgb.extent(3);
29 
30  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
31  KOKKOS_LAMBDA (const size_t cell)
32  {
33  scalar_type value, value2;
34  for (int basis=0; basis<num_basis; ++basis) {
35  value = 0.0;
36  value2 = 0.0;
37  for (int qp=0; qp<num_points; ++qp) {
38  for (int dim=0; dim<num_dim; ++dim)
39  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
40  value2 += src(cell,qp)*wbs(cell,basis,qp);
41  }
42  residual(cell,basis) = value+value2;
43  }
44  });
45 }
46 
47 template<typename FluxView, typename WgbView, typename SrcView,
48  typename WbsView, typename ResidualView>
49 void run_fad_scratch(const FluxView& flux, const WgbView& wgb,
50  const SrcView& src, const WbsView& wbs,
51  const ResidualView& residual)
52 {
53  typedef typename ResidualView::execution_space execution_space;
54  typedef typename ResidualView::non_const_value_type scalar_type;
55  typedef Kokkos::TeamPolicy<execution_space> policy_type;
56  typedef typename policy_type::member_type team_member;
57  typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
58 
59  const size_t num_cells = wgb.extent(0);
60  const int num_basis = wgb.extent(1);
61  const int num_points = wgb.extent(2);
62  const int num_dim = wgb.extent(3);
63 
64  const int vector_size = 1;
65  const int team_size = is_cuda_space<execution_space>::value ? 32 : 1;
66  const int fad_size = Kokkos::dimension_scalar(residual);
67  const size_t range = (num_cells+team_size-1)/team_size;
68  const size_t bytes = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
69  policy_type policy(range,team_size,vector_size);
70 
71  Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
72  KOKKOS_LAMBDA (const team_member& team)
73  {
74  const int team_rank = team.team_rank();
75  tmp_scratch_type value(team.team_scratch(0), team_size, fad_size);
76  tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
77  const size_t cell = team.league_rank()*team_size + team_rank;
78  if (cell < num_cells) {
79  for (int basis=0; basis<num_basis; ++basis) {
80  value(team_rank) = 0.0;
81  value2(team_rank) = 0.0;
82  for (int qp=0; qp<num_points; ++qp) {
83  for (int dim=0; dim<num_dim; ++dim)
84  value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
85  value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
86  }
87  residual(cell,basis) = value(team_rank)+value2(team_rank);
88  }
89  }
90  });
91 }
92 
93 template<int N, typename FluxView, typename WgbView, typename SrcView,
94  typename WbsView, typename ResidualView>
95 void run_analytic_flat(const FluxView& flux, const WgbView& wgb,
96  const SrcView& src, const WbsView& wbs,
97  const ResidualView& residual)
98 {
99  typedef typename ResidualView::execution_space execution_space;
100  typedef typename ResidualView::non_const_value_type scalar_type;
101 
102  const size_t num_cells = wgb.extent(0);
103  const int num_basis = wgb.extent(1);
104  const int num_points = wgb.extent(2);
105  const int num_dim = wgb.extent(3);
106 
107  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
108  KOKKOS_LAMBDA (const size_t cell)
109  {
110  scalar_type value[N+1],value2[N+1];
111  for (int basis=0; basis<num_basis; ++basis) {
112  for (int k=0; k<N+1; ++k) {
113  value[k] = 0.0;
114  value2[k] = 0.0;
115  }
116  for (int qp=0; qp<num_points; ++qp) {
117  for (int dim=0; dim<num_dim; ++dim) {
118  const scalar_type flux_val = flux(cell,qp,dim,N);
119  const scalar_type wgb_val = wgb(cell,basis,qp,dim,N);
120  value[N] += flux_val*wgb_val;
121  for(int k=0; k<N; k++)
122  value[k] +=
123  flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
124  }
125  const scalar_type src_val = src(cell,qp,N);
126  const scalar_type wbs_val = wbs(cell,basis,qp,N);
127  value2[N] += src_val*wbs_val;
128  for(int k=0; k<N; k++)
129  value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
130  }
131  for(int k=0; k<=N; k++)
132  residual(cell,basis,k) = value[k]+value2[k];
133  }
134  });
135 }
136 
137 template<int N, typename FluxView, typename WgbView, typename SrcView,
138  typename WbsView, typename ResidualView>
139 void run_analytic_team(const FluxView& flux, const WgbView& wgb,
140  const SrcView& src, const WbsView& wbs,
141  const ResidualView& residual)
142 {
143  typedef typename ResidualView::execution_space execution_space;
144  typedef typename ResidualView::non_const_value_type scalar_type;
145  typedef Kokkos::TeamPolicy<execution_space> policy_type;
146  typedef typename policy_type::member_type team_member;
147  typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
148 
149  const size_t num_cells = wgb.extent(0);
150  const int num_basis = wgb.extent(1);
151  /*const*/ int num_points = wgb.extent(2);
152  /*const*/ int num_dim = wgb.extent(3);
153 
154  const size_t bytes = 2*tmp_scratch_type::shmem_size();
155  policy_type policy(num_cells,num_basis,32);
156  Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
157  KOKKOS_LAMBDA (const team_member& team)
158  {
159  tmp_scratch_type value(team.thread_scratch(0));
160  tmp_scratch_type value2(team.thread_scratch(0));
161  const size_t cell = team.league_rank();
162  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
163  [&] (const int& basis)
164  {
165  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
166  [&] (const int& k)
167  {
168  value(k) = 0;
169  value2(k) = 0;
170  });
171  for (int qp=0; qp<num_points; ++qp) {
172  for (int dim=0; dim<num_dim; ++dim) {
173  const scalar_type flux_val = flux(cell,qp,dim,N);
174  const scalar_type wgb_val = wgb(cell,basis,qp,dim,N);
175  Kokkos::single(Kokkos::PerThread(team), [&] () {
176  value[N] += flux_val*wgb_val;
177  });
178  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
179  [&] (const int& k)
180  {
181  value[k] +=
182  flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
183  });
184  }
185  const scalar_type src_val = src(cell,qp,N);
186  const scalar_type wbs_val = wbs(cell,basis,qp,N);
187  Kokkos::single(Kokkos::PerThread(team), [&] () {
188  value2[N] += src_val*wbs_val;
189  });
190  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
191  [&] (const int& k)
192  {
193  value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
194  });
195  }
196  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
197  [&] (const int& k)
198  {
199  residual(cell,basis,k) = value[k]+value2[k];
200  });
201  });
202  });
203 }
204 
205 template <typename FadType, int N, typename ExecSpace>
206 double time_fad_flat(int ncells, int num_basis, int num_points, int ndim,
207  int ntrial, bool check)
208 {
209  typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
210  typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
211  typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
212 
213  t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
214  t_3DView flux("",ncells,num_points,ndim,N+1);
215  t_3DView wbs("",ncells,num_basis,num_points,N+1);
216  t_2DView src("",ncells,num_points,N+1);
217  t_2DView residual("",ncells,num_basis,N+1);
218  init_fad(wgb, wbs, flux, src, residual);
219 
220  // Run once to warm up, complete any UVM transfers
221  run_fad_flat(flux, wgb, src, wbs, residual);
222 
223  // Time execution
224  Kokkos::fence();
225  Kokkos::Timer timer;
226  for (int i=0; i<ntrial; ++i)
227  run_fad_flat(flux, wgb, src, wbs, residual);
228  Kokkos::fence();
229  double time = timer.seconds() / ntrial / ncells;
230 
231  // Check result
232  if (check)
233  check_residual(flux, wgb, src, wbs, residual);
234 
235  return time;
236 }
237 
238 template <typename FadType, int N, typename ExecSpace>
239 double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim,
240  int ntrial, bool check)
241 {
242  typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
243  typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
244  typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
245 
246  t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
247  t_3DView flux("",ncells,num_points,ndim,N+1);
248  t_3DView wbs("",ncells,num_basis,num_points,N+1);
249  t_2DView src("",ncells,num_points,N+1);
250  t_2DView residual("",ncells,num_basis,N+1);
251  init_fad(wgb, wbs, flux, src, residual);
252 
253  // Run once to warm up, complete any UVM transfers
254  run_fad_scratch(flux, wgb, src, wbs, residual);
255 
256  // Time execution
257  Kokkos::fence();
258  Kokkos::Timer timer;
259  for (int i=0; i<ntrial; ++i)
260  run_fad_scratch(flux, wgb, src, wbs, residual);
261  Kokkos::fence();
262  double time = timer.seconds() / ntrial / ncells;
263 
264  // Check result
265  if (check)
266  check_residual(flux, wgb, src, wbs, residual);
267 
268  return time;
269 }
270 
271 template <int N, typename ExecSpace>
272 double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim,
273  int ntrial, bool check)
274 {
275  typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
276  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
277  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
278 
279  t_4DView wgb("",ncells,num_basis,num_points,ndim);
280  t_3DView flux("",ncells,num_points,ndim);
281  t_3DView wbs("",ncells,num_basis,num_points);
282  t_2DView src("",ncells,num_points);
283  t_2DView residual("",ncells,num_basis);
284  init_array(wgb, wbs, flux, src, residual);
285 
286  // Run once to warm up, complete any UVM transfers
287  run_analytic_flat<N>(flux, wgb, src, wbs, residual);
288 
289  // Time execution
290  Kokkos::fence();
291  Kokkos::Timer timer;
292  for (int i=0; i<ntrial; ++i)
293  run_analytic_flat<N>(flux, wgb, src, wbs, residual);
294  Kokkos::fence();
295  double time = timer.seconds() / ntrial / ncells;
296 
297  // Check result
298  if (check)
299  check_residual(flux, wgb, src, wbs, residual);
300 
301  return time;
302 }
303 
304 template <int N, typename ExecSpace>
305 double time_analytic_const(int ncells, int num_basis, int num_points, int ndim,
306  int ntrial, bool check)
307 {
308  typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
309  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
310  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
311  typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
312 
313  t_4DView wgb("",ncells,num_basis,num_points,ndim);
314  t_3DView flux("",ncells,num_points,ndim);
315  t_3DView wbs("",ncells,num_basis,num_points);
316  t_2DView src("",ncells,num_points);
317  t_2DView residual("",ncells,num_basis);
318  init_array(wgb, wbs, flux, src, residual);
319 
320  t_3DView_const flux_const = flux;
321 
322  // Run once to warm up, complete any UVM transfers
323  run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
324 
325  // Time execution
326  Kokkos::fence();
327  Kokkos::Timer timer;
328  for (int i=0; i<ntrial; ++i)
329  run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
330  Kokkos::fence();
331  double time = timer.seconds() / ntrial / ncells;
332 
333  // Check result
334  if (check)
335  check_residual(flux, wgb, src, wbs, residual);
336 
337  return time;
338 }
339 
340 template <int N, typename ExecSpace>
341 double time_analytic_team(int ncells, int num_basis, int num_points, int ndim,
342  int ntrial, bool check)
343 {
344  typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
345  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
346  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
347  typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
348 
349  t_4DView wgb("",ncells,num_basis,num_points,ndim);
350  t_3DView flux("",ncells,num_points,ndim);
351  t_3DView wbs("",ncells,num_basis,num_points);
352  t_2DView src("",ncells,num_points);
353  t_2DView residual("",ncells,num_basis);
354  init_array(wgb, wbs, flux, src, residual);
355 
356  t_3DView_const flux_const = flux;
357 
358  // Run once to warm up, complete any UVM transfers
359  run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
360 
361  // Time execution
362  Kokkos::fence();
363  Kokkos::Timer timer;
364  for (int i=0; i<ntrial; ++i)
365  run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
366  Kokkos::fence();
367  double time = timer.seconds() / ntrial / ncells;
368 
369  // Check result
370  if (check)
371  check_residual(flux, wgb, src, wbs, residual);
372 
373  return time;
374 }
375 
376 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
377  template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
378  template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
379 
380 #define INST_FUNC_N_DEV(N,DEV) \
381  INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \
382  INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \
383  INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \
384  template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
385  template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
386  template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
387 
388 #define INST_FUNC_DEV(DEV) \
389  INST_FUNC_N_DEV( fad_dim, DEV )
390 
391 #ifdef KOKKOS_ENABLE_SERIAL
392 INST_FUNC_DEV(Kokkos::Serial)
393 #endif
394 
395 #ifdef KOKKOS_ENABLE_OPENMP
396 INST_FUNC_DEV(Kokkos::OpenMP)
397 #endif
398 
399 #ifdef KOKKOS_ENABLE_THREADS
400 INST_FUNC_DEV(Kokkos::Threads)
401 #endif
402 
403 #ifdef KOKKOS_ENABLE_CUDA
404 INST_FUNC_DEV(Kokkos::Cuda)
405 #endif
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:49
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:272
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:305
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:206
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:239
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:95
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:139
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
const int N
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
int value
#define INST_FUNC_DEV(DEV)
Definition: advection.cpp:388
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:18
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:341