Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
advection_hierarchical.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
10 #define SACADO_VIEW_CUDA_HIERARCHICAL 1
11 #define SACADO_ALIGN_SFAD 1
12 
13 #include "Sacado.hpp"
15 #include "common.hpp"
16 
17 #include "Kokkos_Timer.hpp"
18 
19 template<typename FluxView, typename WgbView, typename SrcView,
20  typename WbsView, typename ResidualView>
21 void run_fad_hierarchical_flat(const FluxView& flux, const WgbView& wgb,
22  const SrcView& src, const WbsView& wbs,
23  const ResidualView& residual)
24 {
25  typedef typename ResidualView::execution_space execution_space;
26  typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
27  typedef Kokkos::TeamPolicy<execution_space> policy_type;
28  typedef typename policy_type::member_type team_member;
29 
30  const size_t num_cells = wgb.extent(0);
31  const int num_basis = wgb.extent(1);
32  const int num_points = wgb.extent(2);
33  const int num_dim = wgb.extent(3);
34 
35  const bool is_cuda = is_cuda_space<execution_space>::value;
36  const int vector_size = is_cuda ? 32 : 1;
37  const int team_size = is_cuda ? 256/vector_size : 1;
38  const size_t range = (num_cells+team_size-1)/team_size;
39 
40  policy_type policy(range,team_size,vector_size);
41  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const team_member& team)
42  {
43  const size_t cell = team.league_rank()*team_size + team.team_rank();
44  local_scalar_type value, value2;
45  for (int basis=0; basis<num_basis; ++basis) {
46  value = 0.0;
47  value2 = 0.0;
48  for (int qp=0; qp<num_points; ++qp) {
49  for (int dim=0; dim<num_dim; ++dim)
50  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
51  value2 += src(cell,qp)*wbs(cell,basis,qp);
52  }
53  residual(cell,basis) = value+value2;
54  }
55  });
56 }
57 
58 template<typename FluxView, typename WgbView, typename SrcView,
59  typename WbsView, typename ResidualView>
60 void run_fad_hierarchical_team(const FluxView& flux, const WgbView& wgb,
61  const SrcView& src, const WbsView& wbs,
62  const ResidualView& residual)
63 {
64  typedef typename ResidualView::execution_space execution_space;
65  typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
66  typedef Kokkos::TeamPolicy<execution_space> policy_type;
67  typedef typename policy_type::member_type team_member;
68 
69  const size_t num_cells = wgb.extent(0);
70  const int num_basis = wgb.extent(1);
71  const int num_points = wgb.extent(2);
72  const int num_dim = wgb.extent(3);
73 
74  const bool is_cuda = is_cuda_space<execution_space>::value;
75  const int vector_size = is_cuda ? 32 : 1;
76  const int team_size = is_cuda ? 256/vector_size : 1;
77 
78  policy_type policy(num_cells,team_size,vector_size);
79  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const team_member& team)
80  {
81  const int team_rank = team.team_rank();
82  const size_t cell = team.league_rank();
83  local_scalar_type value, value2;
84  for (int basis=team_rank; basis<num_basis; basis+=team_size) {
85  value = 0.0;
86  value2 = 0.0;
87  for (int qp=0; qp<num_points; ++qp) {
88  for (int dim=0; dim<num_dim; ++dim)
89  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
90  value2 += src(cell,qp)*wbs(cell,basis,qp);
91  }
92  residual(cell,basis) = value+value2;
93  }
94  });
95 }
96 
97 template <typename FadType, int N, typename ExecSpace>
98 double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points,
99  int ndim, int ntrial, bool check)
100 {
101  static const int FadStride = is_cuda_space<ExecSpace>::value ? 32 : 1;
102 #if defined(SACADO_ALIGN_SFAD)
103  static const int Nalign = ((N+FadStride-1)/FadStride)*FadStride;
104  typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
105 #else
106  typedef FadType AlignedFadType;
107 #endif
108 
109  typedef typename ExecSpace::array_layout DefaultLayout;
111  typedef Kokkos::View<AlignedFadType****,ContLayout,ExecSpace> t_4DView;
112  typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
113  typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
114 
115  t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
116  t_3DView flux("",ncells,num_points,ndim,N+1);
117  t_3DView wbs("",ncells,num_basis,num_points,N+1);
118  t_2DView src("",ncells,num_points,N+1);
119  t_2DView residual("",ncells,num_basis,N+1);
120  init_fad(wgb, wbs, flux, src, residual);
121 
122  // Run once to warm up, complete any UVM transfers
123  run_fad_hierarchical_flat(flux, wgb, src, wbs, residual);
124 
125  // Time execution
126  Kokkos::fence();
127  Kokkos::Timer timer;
128  for (int i=0; i<ntrial; ++i)
129  run_fad_hierarchical_flat(flux, wgb, src, wbs, residual);
130  Kokkos::fence();
131  double time = timer.seconds() / ntrial / ncells;
132 
133  // Check result
134  if (check)
135  check_residual(flux, wgb, src, wbs, residual);
136 
137  return time;
138 }
139 
140 template <typename FadType, int N, typename ExecSpace>
141 double time_fad_hierarchical_team(int ncells, int num_basis, int num_points,
142  int ndim, int ntrial, bool check)
143 {
144  static const int FadStride = is_cuda_space<ExecSpace>::value ? 32 : 1;
145 #if defined(SACADO_ALIGN_SFAD)
146  static const int Nalign = ((N+FadStride-1)/FadStride)*FadStride;
147  typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
148 #else
149  typedef FadType AlignedFadType;
150 #endif
151 
152  typedef typename ExecSpace::array_layout DefaultLayout;
154  typedef Kokkos::View<AlignedFadType****,ContLayout,ExecSpace> t_4DView;
155  typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
156  typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
157 
158  t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
159  t_3DView flux("",ncells,num_points,ndim,N+1);
160  t_3DView wbs("",ncells,num_basis,num_points,N+1);
161  t_2DView src("",ncells,num_points,N+1);
162  t_2DView residual("",ncells,num_basis,N+1);
163  init_fad(wgb, wbs, flux, src, residual);
164 
165  // Run once to warm up, complete any UVM transfers
166  run_fad_hierarchical_team(flux, wgb, src, wbs, residual);
167 
168  // Time execution
169  Kokkos::fence();
170  Kokkos::Timer timer;
171  for (int i=0; i<ntrial; ++i)
172  run_fad_hierarchical_team(flux, wgb, src, wbs, residual);
173  Kokkos::fence();
174  double time = timer.seconds() / ntrial / ncells;
175 
176  // Check result
177  if (check)
178  check_residual(flux, wgb, src, wbs, residual);
179 
180  return time;
181 }
182 
183 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
184  template double time_fad_hierarchical_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
185  template double time_fad_hierarchical_team< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
186 
187 #define INST_FUNC_DEV(DEV) \
188  INST_FUNC_FAD_N_DEV( SFadType, fad_dim, DEV ) \
189  INST_FUNC_FAD_N_DEV( SLFadType, fad_dim, DEV )
190 
191 #ifdef KOKKOS_ENABLE_SERIAL
192 INST_FUNC_DEV(Kokkos::Serial)
193 #endif
194 
195 #ifdef KOKKOS_ENABLE_OPENMP
196 INST_FUNC_DEV(Kokkos::OpenMP)
197 #endif
198 
199 #ifdef KOKKOS_ENABLE_THREADS
200 INST_FUNC_DEV(Kokkos::Threads)
201 #endif
202 
203 #ifdef KOKKOS_ENABLE_CUDA
204 INST_FUNC_DEV(Kokkos::Cuda)
205 #endif
void run_fad_hierarchical_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_hierarchical_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
const int N
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
int value
#define INST_FUNC_DEV(DEV)
double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_fad_hierarchical_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)