Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
advection.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Sacado Package
5 // Copyright (2006) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25 // (etphipp@sandia.gov).
26 //
27 // ***********************************************************************
28 // @HEADER
29 
30 #include "Sacado.hpp"
31 #include "advection.hpp"
32 #include "common.hpp"
33 
34 #include "impl/Kokkos_Timer.hpp"
35 
36 template<typename FluxView, typename WgbView, typename SrcView,
37  typename WbsView, typename ResidualView>
38 void run_fad_flat(const FluxView& flux, const WgbView& wgb,
39  const SrcView& src, const WbsView& wbs,
40  const ResidualView& residual)
41 {
42  typedef typename ResidualView::execution_space execution_space;
43  typedef typename ResidualView::non_const_value_type scalar_type;
44 
45  const size_t num_cells = wgb.extent(0);
46  const int num_basis = wgb.extent(1);
47  const int num_points = wgb.extent(2);
48  const int num_dim = wgb.extent(3);
49 
50  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
51  KOKKOS_LAMBDA (const size_t cell)
52  {
53  scalar_type value, value2;
54  for (int basis=0; basis<num_basis; ++basis) {
55  value = 0.0;
56  value2 = 0.0;
57  for (int qp=0; qp<num_points; ++qp) {
58  for (int dim=0; dim<num_dim; ++dim)
59  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
60  value2 += src(cell,qp)*wbs(cell,basis,qp);
61  }
62  residual(cell,basis) = value+value2;
63  }
64  });
65 }
66 
67 template<typename FluxView, typename WgbView, typename SrcView,
68  typename WbsView, typename ResidualView>
69 void run_fad_scratch(const FluxView& flux, const WgbView& wgb,
70  const SrcView& src, const WbsView& wbs,
71  const ResidualView& residual)
72 {
73  typedef typename ResidualView::execution_space execution_space;
74  typedef typename ResidualView::non_const_value_type scalar_type;
75  typedef Kokkos::TeamPolicy<execution_space> policy_type;
76  typedef typename policy_type::member_type team_member;
77  typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
78 
79  const size_t num_cells = wgb.extent(0);
80  const int num_basis = wgb.extent(1);
81  const int num_points = wgb.extent(2);
82  const int num_dim = wgb.extent(3);
83 
84  const int vector_size = 1;
85  const int team_size = is_cuda_space<execution_space>::value ? 32 : 1;
86  const int fad_size = Kokkos::dimension_scalar(residual);
87  const size_t range = (num_cells+team_size-1)/team_size;
88  const size_t bytes = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
89  policy_type policy(range,team_size,vector_size);
90 
91  Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
92  KOKKOS_LAMBDA (const team_member& team)
93  {
94  const int team_rank = team.team_rank();
95  tmp_scratch_type value(team.team_scratch(0), team_size, fad_size);
96  tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
97  const size_t cell = team.league_rank()*team_size + team_rank;
98  if (cell < num_cells) {
99  for (int basis=0; basis<num_basis; ++basis) {
100  value(team_rank) = 0.0;
101  value2(team_rank) = 0.0;
102  for (int qp=0; qp<num_points; ++qp) {
103  for (int dim=0; dim<num_dim; ++dim)
104  value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
105  value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
106  }
107  residual(cell,basis) = value(team_rank)+value2(team_rank);
108  }
109  }
110  });
111 }
112 
113 template<int N, typename FluxView, typename WgbView, typename SrcView,
114  typename WbsView, typename ResidualView>
115 void run_analytic_flat(const FluxView& flux, const WgbView& wgb,
116  const SrcView& src, const WbsView& wbs,
117  const ResidualView& residual)
118 {
119  typedef typename ResidualView::execution_space execution_space;
120  typedef typename ResidualView::non_const_value_type scalar_type;
121 
122  const size_t num_cells = wgb.extent(0);
123  const int num_basis = wgb.extent(1);
124  const int num_points = wgb.extent(2);
125  const int num_dim = wgb.extent(3);
126 
127  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
128  KOKKOS_LAMBDA (const size_t cell)
129  {
130  scalar_type value[N+1],value2[N+1];
131  for (int basis=0; basis<num_basis; ++basis) {
132  for (int k=0; k<N+1; ++k) {
133  value[k] = 0.0;
134  value2[k] = 0.0;
135  }
136  for (int qp=0; qp<num_points; ++qp) {
137  for (int dim=0; dim<num_dim; ++dim) {
138  const scalar_type flux_val = flux(cell,qp,dim,N);
139  const scalar_type wgb_val = wgb(cell,basis,qp,dim,N);
140  value[N] += flux_val*wgb_val;
141  for(int k=0; k<N; k++)
142  value[k] +=
143  flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
144  }
145  const scalar_type src_val = src(cell,qp,N);
146  const scalar_type wbs_val = wbs(cell,basis,qp,N);
147  value2[N] += src_val*wbs_val;
148  for(int k=0; k<N; k++)
149  value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
150  }
151  for(int k=0; k<=N; k++)
152  residual(cell,basis,k) = value[k]+value2[k];
153  }
154  });
155 }
156 
157 template<int N, typename FluxView, typename WgbView, typename SrcView,
158  typename WbsView, typename ResidualView>
159 void run_analytic_team(const FluxView& flux, const WgbView& wgb,
160  const SrcView& src, const WbsView& wbs,
161  const ResidualView& residual)
162 {
163  typedef typename ResidualView::execution_space execution_space;
164  typedef typename ResidualView::non_const_value_type scalar_type;
165  typedef Kokkos::TeamPolicy<execution_space> policy_type;
166  typedef typename policy_type::member_type team_member;
167  typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
168 
169  const size_t num_cells = wgb.extent(0);
170  const int num_basis = wgb.extent(1);
171  /*const*/ int num_points = wgb.extent(2);
172  /*const*/ int num_dim = wgb.extent(3);
173 
174  const size_t bytes = 2*tmp_scratch_type::shmem_size();
175  policy_type policy(num_cells,num_basis,32);
176  Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
177  KOKKOS_LAMBDA (const team_member& team)
178  {
179  tmp_scratch_type value(team.thread_scratch(0));
180  tmp_scratch_type value2(team.thread_scratch(0));
181  const size_t cell = team.league_rank();
182  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
183  [&] (const int& basis)
184  {
185  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
186  [&] (const int& k)
187  {
188  value(k) = 0;
189  value2(k) = 0;
190  });
191  for (int qp=0; qp<num_points; ++qp) {
192  for (int dim=0; dim<num_dim; ++dim) {
193  const scalar_type flux_val = flux(cell,qp,dim,N);
194  const scalar_type wgb_val = wgb(cell,basis,qp,dim,N);
195  Kokkos::single(Kokkos::PerThread(team), [&] () {
196  value[N] += flux_val*wgb_val;
197  });
198  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
199  [&] (const int& k)
200  {
201  value[k] +=
202  flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
203  });
204  }
205  const scalar_type src_val = src(cell,qp,N);
206  const scalar_type wbs_val = wbs(cell,basis,qp,N);
207  Kokkos::single(Kokkos::PerThread(team), [&] () {
208  value2[N] += src_val*wbs_val;
209  });
210  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
211  [&] (const int& k)
212  {
213  value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
214  });
215  }
216  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
217  [&] (const int& k)
218  {
219  residual(cell,basis,k) = value[k]+value2[k];
220  });
221  });
222  });
223 }
224 
225 template <typename FadType, int N, typename ExecSpace>
226 double time_fad_flat(int ncells, int num_basis, int num_points, int ndim,
227  int ntrial, bool check)
228 {
229  typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
230  typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
231  typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
232 
233  t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
234  t_3DView flux("",ncells,num_points,ndim,N+1);
235  t_3DView wbs("",ncells,num_basis,num_points,N+1);
236  t_2DView src("",ncells,num_points,N+1);
237  t_2DView residual("",ncells,num_basis,N+1);
238  init_fad(wgb, wbs, flux, src, residual);
239 
240  // Run once to warm up, complete any UVM transfers
241  run_fad_flat(flux, wgb, src, wbs, residual);
242 
243  // Time execution
244  Kokkos::fence();
245  Kokkos::Impl::Timer timer;
246  for (int i=0; i<ntrial; ++i)
247  run_fad_flat(flux, wgb, src, wbs, residual);
248  Kokkos::fence();
249  double time = timer.seconds() / ntrial / ncells;
250 
251  // Check result
252  if (check)
253  check_residual(flux, wgb, src, wbs, residual);
254 
255  return time;
256 }
257 
258 template <typename FadType, int N, typename ExecSpace>
259 double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim,
260  int ntrial, bool check)
261 {
262  typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
263  typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
264  typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
265 
266  t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
267  t_3DView flux("",ncells,num_points,ndim,N+1);
268  t_3DView wbs("",ncells,num_basis,num_points,N+1);
269  t_2DView src("",ncells,num_points,N+1);
270  t_2DView residual("",ncells,num_basis,N+1);
271  init_fad(wgb, wbs, flux, src, residual);
272 
273  // Run once to warm up, complete any UVM transfers
274  run_fad_scratch(flux, wgb, src, wbs, residual);
275 
276  // Time execution
277  Kokkos::fence();
278  Kokkos::Impl::Timer timer;
279  for (int i=0; i<ntrial; ++i)
280  run_fad_scratch(flux, wgb, src, wbs, residual);
281  Kokkos::fence();
282  double time = timer.seconds() / ntrial / ncells;
283 
284  // Check result
285  if (check)
286  check_residual(flux, wgb, src, wbs, residual);
287 
288  return time;
289 }
290 
291 template <int N, typename ExecSpace>
292 double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim,
293  int ntrial, bool check)
294 {
295  typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
296  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
297  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
298 
299  t_4DView wgb("",ncells,num_basis,num_points,ndim);
300  t_3DView flux("",ncells,num_points,ndim);
301  t_3DView wbs("",ncells,num_basis,num_points);
302  t_2DView src("",ncells,num_points);
303  t_2DView residual("",ncells,num_basis);
304  init_array(wgb, wbs, flux, src, residual);
305 
306  // Run once to warm up, complete any UVM transfers
307  run_analytic_flat<N>(flux, wgb, src, wbs, residual);
308 
309  // Time execution
310  Kokkos::fence();
311  Kokkos::Impl::Timer timer;
312  for (int i=0; i<ntrial; ++i)
313  run_analytic_flat<N>(flux, wgb, src, wbs, residual);
314  Kokkos::fence();
315  double time = timer.seconds() / ntrial / ncells;
316 
317  // Check result
318  if (check)
319  check_residual(flux, wgb, src, wbs, residual);
320 
321  return time;
322 }
323 
324 template <int N, typename ExecSpace>
325 double time_analytic_const(int ncells, int num_basis, int num_points, int ndim,
326  int ntrial, bool check)
327 {
328  typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
329  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
330  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
331  typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
332 
333  t_4DView wgb("",ncells,num_basis,num_points,ndim);
334  t_3DView flux("",ncells,num_points,ndim);
335  t_3DView wbs("",ncells,num_basis,num_points);
336  t_2DView src("",ncells,num_points);
337  t_2DView residual("",ncells,num_basis);
338  init_array(wgb, wbs, flux, src, residual);
339 
340  t_3DView_const flux_const = flux;
341 
342  // Run once to warm up, complete any UVM transfers
343  run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
344 
345  // Time execution
346  Kokkos::fence();
347  Kokkos::Impl::Timer timer;
348  for (int i=0; i<ntrial; ++i)
349  run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
350  Kokkos::fence();
351  double time = timer.seconds() / ntrial / ncells;
352 
353  // Check result
354  if (check)
355  check_residual(flux, wgb, src, wbs, residual);
356 
357  return time;
358 }
359 
360 template <int N, typename ExecSpace>
361 double time_analytic_team(int ncells, int num_basis, int num_points, int ndim,
362  int ntrial, bool check)
363 {
364  typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
365  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
366  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
367  typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
368 
369  t_4DView wgb("",ncells,num_basis,num_points,ndim);
370  t_3DView flux("",ncells,num_points,ndim);
371  t_3DView wbs("",ncells,num_basis,num_points);
372  t_2DView src("",ncells,num_points);
373  t_2DView residual("",ncells,num_basis);
374  init_array(wgb, wbs, flux, src, residual);
375 
376  t_3DView_const flux_const = flux;
377 
378  // Run once to warm up, complete any UVM transfers
379  run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
380 
381  // Time execution
382  Kokkos::fence();
383  Kokkos::Impl::Timer timer;
384  for (int i=0; i<ntrial; ++i)
385  run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
386  Kokkos::fence();
387  double time = timer.seconds() / ntrial / ncells;
388 
389  // Check result
390  if (check)
391  check_residual(flux, wgb, src, wbs, residual);
392 
393  return time;
394 }
395 
396 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
397  template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
398  template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
399 
400 #define INST_FUNC_N_DEV(N,DEV) \
401  INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \
402  INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \
403  INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \
404  template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
405  template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
406  template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
407 
408 #define INST_FUNC_DEV(DEV) \
409  INST_FUNC_N_DEV( fad_dim, DEV )
410 
411 #ifdef KOKKOS_ENABLE_SERIAL
412 INST_FUNC_DEV(Kokkos::Serial)
413 #endif
414 
415 #ifdef KOKKOS_ENABLE_OPENMP
416 INST_FUNC_DEV(Kokkos::OpenMP)
417 #endif
418 
419 #ifdef KOKKOS_ENABLE_THREADS
420 INST_FUNC_DEV(Kokkos::Threads)
421 #endif
422 
423 #ifdef KOKKOS_ENABLE_CUDA
424 INST_FUNC_DEV(Kokkos::Cuda)
425 #endif
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:69
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:292
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:325
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:226
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:259
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:115
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:159
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
const int N
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
#define INST_FUNC_DEV(DEV)
Definition: advection.cpp:408
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:38
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:361