Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
const_basis/advection.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Sacado Package
5 // Copyright (2006) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25 // (etphipp@sandia.gov).
26 //
27 // ***********************************************************************
28 // @HEADER
29 
30 #include "Sacado.hpp"
31 #include "advection.hpp"
32 #include "common.hpp"
33 
34 #include "impl/Kokkos_Timer.hpp"
35 
36 template<typename FluxView, typename WgbView, typename SrcView,
37  typename WbsView, typename ResidualView>
38 void run_fad_flat(const FluxView& flux, const WgbView& wgb,
39  const SrcView& src, const WbsView& wbs,
40  const ResidualView& residual)
41 {
42  typedef typename ResidualView::execution_space execution_space;
43  typedef typename ResidualView::non_const_value_type scalar_type;
44 
45  const size_t num_cells = wgb.extent(0);
46  const int num_basis = wgb.extent(1);
47  const int num_points = wgb.extent(2);
48  const int num_dim = wgb.extent(3);
49 
50  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
51  KOKKOS_LAMBDA (const size_t cell)
52  {
53  scalar_type value, value2;
54  for (int basis=0; basis<num_basis; ++basis) {
55  value = 0.0;
56  value2 = 0.0;
57  for (int qp=0; qp<num_points; ++qp) {
58  for (int dim=0; dim<num_dim; ++dim)
59  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
60  value2 += src(cell,qp)*wbs(cell,basis,qp);
61  }
62  residual(cell,basis) = value+value2;
63  }
64  });
65 }
66 
67 template<typename FluxView, typename WgbView, typename SrcView,
68  typename WbsView, typename ResidualView>
69 void run_fad_scratch(const FluxView& flux, const WgbView& wgb,
70  const SrcView& src, const WbsView& wbs,
71  const ResidualView& residual)
72 {
73  typedef typename ResidualView::execution_space execution_space;
74  typedef typename ResidualView::non_const_value_type scalar_type;
75  typedef Kokkos::TeamPolicy<execution_space> policy_type;
76  typedef typename policy_type::member_type team_member;
77  typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
78 
79  const size_t num_cells = wgb.extent(0);
80  const int num_basis = wgb.extent(1);
81  const int num_points = wgb.extent(2);
82  const int num_dim = wgb.extent(3);
83 
84  const int vector_size = 1;
85  const int team_size = is_cuda_space<execution_space>::value ? 32 : 1;
86  const int fad_size = Kokkos::dimension_scalar(residual);
87  const size_t range = (num_cells+team_size-1)/team_size;
88  const size_t bytes = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
89  policy_type policy(range,team_size,vector_size);
90 
91  Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
92  KOKKOS_LAMBDA (const team_member& team)
93  {
94  const int team_rank = team.team_rank();
95  tmp_scratch_type value(team.team_scratch(0), team_size, fad_size);
96  tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
97  const size_t cell = team.league_rank()*team_size + team_rank;
98  if (cell < num_cells) {
99  for (int basis=0; basis<num_basis; ++basis) {
100  value(team_rank) = 0.0;
101  value2(team_rank) = 0.0;
102  for (int qp=0; qp<num_points; ++qp) {
103  for (int dim=0; dim<num_dim; ++dim)
104  value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
105  value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
106  }
107  residual(cell,basis) = value(team_rank)+value2(team_rank);
108  }
109  }
110  });
111 }
112 
113 template<int N, typename FluxView, typename WgbView, typename SrcView,
114  typename WbsView, typename ResidualView>
115 void run_analytic_flat(const FluxView& flux, const WgbView& wgb,
116  const SrcView& src, const WbsView& wbs,
117  const ResidualView& residual)
118 {
119  typedef typename ResidualView::execution_space execution_space;
120  typedef typename ResidualView::non_const_value_type scalar_type;
121 
122  const size_t num_cells = wgb.extent(0);
123  const int num_basis = wgb.extent(1);
124  const int num_points = wgb.extent(2);
125  const int num_dim = wgb.extent(3);
126 
127  Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
128  KOKKOS_LAMBDA (const size_t cell)
129  {
130  scalar_type value[N+1],value2[N+1];
131  for (int basis=0; basis<num_basis; ++basis) {
132  for (int k=0; k<N+1; ++k) {
133  value[k] = 0.0;
134  value2[k] = 0.0;
135  }
136  for (int qp=0; qp<num_points; ++qp) {
137  for (int dim=0; dim<num_dim; ++dim) {
138  const scalar_type flux_val = flux(cell,qp,dim,N);
139  const scalar_type wgb_val = wgb(cell,basis,qp,dim);
140  value[N] += flux_val*wgb_val;
141  for(int k=0; k<N; k++)
142  value[k] += flux(cell,qp,dim,k)*wgb_val;
143  }
144  const scalar_type src_val = src(cell,qp,N);
145  const scalar_type wbs_val = wbs(cell,basis,qp);
146  value2[N] += src_val*wbs_val;
147  for(int k=0; k<N; k++)
148  value2[k] += src(cell,qp,k)*wbs_val;
149  }
150  for(int k=0; k<=N; k++)
151  residual(cell,basis,k) = value[k]+value2[k];
152  }
153  });
154 }
155 
156 template<int N, typename FluxView, typename WgbView, typename SrcView,
157  typename WbsView, typename ResidualView>
158 void run_analytic_team(const FluxView& flux, const WgbView& wgb,
159  const SrcView& src, const WbsView& wbs,
160  const ResidualView& residual)
161 {
162  typedef typename ResidualView::execution_space execution_space;
163  typedef typename ResidualView::non_const_value_type scalar_type;
164  typedef Kokkos::TeamPolicy<execution_space> policy_type;
165  typedef typename policy_type::member_type team_member;
166  typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
167 
168  const size_t num_cells = wgb.extent(0);
169  const int num_basis = wgb.extent(1);
170  /*const*/ int num_points = wgb.extent(2);
171  /*const*/ int num_dim = wgb.extent(3);
172 
173  const size_t bytes = 2*tmp_scratch_type::shmem_size();
174  policy_type policy(num_cells,num_basis,32);
175  Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
176  KOKKOS_LAMBDA (const team_member& team)
177  {
178  tmp_scratch_type value(team.thread_scratch(0));
179  tmp_scratch_type value2(team.thread_scratch(0));
180  const size_t cell = team.league_rank();
181  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
182  [&] (const int& basis)
183  {
184  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
185  [&] (const int& k)
186  {
187  value(k) = 0;
188  value2(k) = 0;
189  });
190  for (int qp=0; qp<num_points; ++qp) {
191  for (int dim=0; dim<num_dim; ++dim) {
192  const scalar_type flux_val = flux(cell,qp,dim,N);
193  const scalar_type wgb_val = wgb(cell,basis,qp,dim);
194  Kokkos::single(Kokkos::PerThread(team), [&] () {
195  value[N] += flux_val*wgb_val;
196  });
197  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
198  [&] (const int& k)
199  {
200  value[k] += flux(cell,qp,dim,k)*wgb_val;
201  });
202  }
203  const scalar_type src_val = src(cell,qp,N);
204  const scalar_type wbs_val = wbs(cell,basis,qp);
205  Kokkos::single(Kokkos::PerThread(team), [&] () {
206  value2[N] += src_val*wbs_val;
207  });
208  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
209  [&] (const int& k)
210  {
211  value2[k] += src(cell,qp,k)*wbs_val;
212  });
213  }
214  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
215  [&] (const int& k)
216  {
217  residual(cell,basis,k) = value[k]+value2[k];
218  });
219  });
220  });
221 }
222 
223 template <typename FadType, int N, typename ExecSpace>
224 double time_fad_flat(int ncells, int num_basis, int num_points, int ndim,
225  int ntrial, bool check)
226 {
227  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
228  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
229  typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
230  typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
231 
232  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
233  t_3DView_d wbs("",ncells,num_basis,num_points);
234  t_3DView flux("",ncells,num_points,ndim,N+1);
235  t_2DView src("",ncells,num_points,N+1);
236  t_2DView residual("",ncells,num_basis,N+1);
237  init_fad(wgb, wbs, flux, src, residual);
238 
239  // Run once to warm up, complete any UVM transfers
240  run_fad_flat(flux, wgb, src, wbs, residual);
241 
242  // Time execution
243  Kokkos::fence();
244  Kokkos::Impl::Timer timer;
245  for (int i=0; i<ntrial; ++i)
246  run_fad_flat(flux, wgb, src, wbs, residual);
247  Kokkos::fence();
248  double time = timer.seconds() / ntrial / ncells;
249 
250  // Check result
251  if (check)
252  check_residual(flux, wgb, src, wbs, residual);
253 
254  return time;
255 }
256 
257 template <typename FadType, int N, typename ExecSpace>
258 double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim,
259  int ntrial, bool check)
260 {
261  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
262  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
263  typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
264  typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
265 
266  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
267  t_3DView_d wbs("",ncells,num_basis,num_points);
268  t_3DView flux("",ncells,num_points,ndim,N+1);
269  t_2DView src("",ncells,num_points,N+1);
270  t_2DView residual("",ncells,num_basis,N+1);
271  init_fad(wgb, wbs, flux, src, residual);
272 
273  // Run once to warm up, complete any UVM transfers
274  run_fad_scratch(flux, wgb, src, wbs, residual);
275 
276  // Time execution
277  Kokkos::fence();
278  Kokkos::Impl::Timer timer;
279  for (int i=0; i<ntrial; ++i)
280  run_fad_scratch(flux, wgb, src, wbs, residual);
281  Kokkos::fence();
282  double time = timer.seconds() / ntrial / ncells;
283 
284  // Check result
285  if (check)
286  check_residual(flux, wgb, src, wbs, residual);
287 
288  return time;
289 }
290 
291 template <int N, typename ExecSpace>
292 double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim,
293  int ntrial, bool check)
294 {
295  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
296  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
297  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
298  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
299 
300  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
301  t_3DView_d wbs("",ncells,num_basis,num_points);
302  t_3DView flux("",ncells,num_points,ndim);
303  t_2DView src("",ncells,num_points);
304  t_2DView residual("",ncells,num_basis);
305  init_array(wgb, wbs, flux, src, residual);
306 
307  // Run once to warm up, complete any UVM transfers
308  run_analytic_flat<N>(flux, wgb, src, wbs, residual);
309 
310  // Time execution
311  Kokkos::fence();
312  Kokkos::Impl::Timer timer;
313  for (int i=0; i<ntrial; ++i)
314  run_analytic_flat<N>(flux, wgb, src, wbs, residual);
315  Kokkos::fence();
316  double time = timer.seconds() / ntrial / ncells;
317 
318  // Check result
319  if (check)
320  check_residual(flux, wgb, src, wbs, residual);
321 
322  return time;
323 }
324 
325 template <int N, typename ExecSpace>
326 double time_analytic_const(int ncells, int num_basis, int num_points, int ndim,
327  int ntrial, bool check)
328 {
329  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
330  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
331  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
332  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
333 
334  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
335  t_3DView_d wbs("",ncells,num_basis,num_points);
336  t_3DView flux("",ncells,num_points,ndim);
337  t_2DView src("",ncells,num_points);
338  t_2DView residual("",ncells,num_basis);
339  init_array(wgb, wbs, flux, src, residual);
340 
341  typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
342  t_3DView_const flux_const = flux;
343 
344  // Run once to warm up, complete any UVM transfers
345  run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
346 
347  // Time execution
348  Kokkos::fence();
349  Kokkos::Impl::Timer timer;
350  for (int i=0; i<ntrial; ++i)
351  run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
352  Kokkos::fence();
353  double time = timer.seconds() / ntrial / ncells;
354 
355  // Check result
356  if (check)
357  check_residual(flux, wgb, src, wbs, residual);
358 
359  return time;
360 }
361 
362 template <int N, typename ExecSpace>
363 double time_analytic_team(int ncells, int num_basis, int num_points, int ndim,
364  int ntrial, bool check)
365 {
366  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
367  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
368  typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
369  typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
370 
371  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
372  t_3DView_d wbs("",ncells,num_basis,num_points);
373  t_3DView flux("",ncells,num_points,ndim);
374  t_2DView src("",ncells,num_points);
375  t_2DView residual("",ncells,num_basis);
376  init_array(wgb, wbs, flux, src, residual);
377 
378  typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
379  t_3DView_const flux_const = flux;
380 
381  // Run once to warm up, complete any UVM transfers
382  run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
383 
384  // Time execution
385  Kokkos::fence();
386  Kokkos::Impl::Timer timer;
387  for (int i=0; i<ntrial; ++i)
388  run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
389  Kokkos::fence();
390  double time = timer.seconds() / ntrial / ncells;
391 
392  // Check result
393  if (check)
394  check_residual(flux, wgb, src, wbs, residual);
395 
396  return time;
397 }
398 
399 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
400  template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
401  template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
402 
403 #define INST_FUNC_N_DEV(N,DEV) \
404  INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \
405  INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \
406  INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \
407  template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
408  template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
409  template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
410 
411 #define INST_FUNC_DEV(DEV) \
412  INST_FUNC_N_DEV( fad_dim, DEV )
413 
414 #ifdef KOKKOS_ENABLE_SERIAL
415 INST_FUNC_DEV(Kokkos::Serial)
416 #endif
417 
418 #ifdef KOKKOS_ENABLE_OPENMP
419 INST_FUNC_DEV(Kokkos::OpenMP)
420 #endif
421 
422 #ifdef KOKKOS_ENABLE_THREADS
423 INST_FUNC_DEV(Kokkos::Threads)
424 #endif
425 
426 #ifdef KOKKOS_ENABLE_CUDA
427 INST_FUNC_DEV(Kokkos::Cuda)
428 #endif
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:69
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:292
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:325
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:226
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:259
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:115
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:159
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
#define INST_FUNC_DEV(DEV)
const int N
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
Definition: advection.cpp:38
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
Definition: advection.cpp:361