Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
const_basis/advection_hierarchical.cpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
10 #define SACADO_VIEW_CUDA_HIERARCHICAL 1
11 #define SACADO_ALIGN_SFAD 1
12 
13 #include "Sacado.hpp"
15 #include "common.hpp"
16 
17 #include "Kokkos_Timer.hpp"
18 
19 template<typename FluxView, typename WgbView, typename SrcView,
20  typename WbsView, typename ResidualView>
21 void run_fad_hierarchical_flat(const FluxView& flux, const WgbView& wgb,
22  const SrcView& src, const WbsView& wbs,
23  const ResidualView& residual)
24 {
25  typedef typename ResidualView::execution_space execution_space;
26  typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
27  typedef Kokkos::TeamPolicy<execution_space> policy_type;
28  typedef typename policy_type::member_type team_member;
29 
30  const size_t num_cells = wgb.extent(0);
31  const int num_basis = wgb.extent(1);
32  const int num_points = wgb.extent(2);
33  const int num_dim = wgb.extent(3);
34 
35  const bool is_cuda = is_cuda_space<execution_space>::value;
36  const int vector_size = is_cuda ? 32 : 1;
37  const int team_size = is_cuda ? 256/vector_size : 1;
38  const size_t range = (num_cells+team_size-1)/team_size;
39 
40  policy_type policy(range,team_size,vector_size);
41  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const team_member& team)
42  {
43  const size_t cell = team.league_rank()*team_size + team.team_rank();
44  local_scalar_type value, value2;
45  for (int basis=0; basis<num_basis; ++basis) {
46  value = 0.0;
47  value2 = 0.0;
48  for (int qp=0; qp<num_points; ++qp) {
49  for (int dim=0; dim<num_dim; ++dim)
50  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
51  value2 += src(cell,qp)*wbs(cell,basis,qp);
52  }
53  residual(cell,basis) = value+value2;
54  }
55  });
56 }
57 
58 template<typename FluxView, typename WgbView, typename SrcView,
59  typename WbsView, typename ResidualView>
60 void run_fad_hierarchical_team(const FluxView& flux, const WgbView& wgb,
61  const SrcView& src, const WbsView& wbs,
62  const ResidualView& residual)
63 {
64  typedef typename ResidualView::execution_space execution_space;
65  typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
66  typedef Kokkos::TeamPolicy<execution_space> policy_type;
67  typedef typename policy_type::member_type team_member;
68 
69  const size_t num_cells = wgb.extent(0);
70  const int num_basis = wgb.extent(1);
71  const int num_points = wgb.extent(2);
72  const int num_dim = wgb.extent(3);
73 
74  const bool is_cuda = is_cuda_space<execution_space>::value;
75  const int vector_size = is_cuda ? 32 : 1;
76  const int team_size = is_cuda ? 256/vector_size : 1;
77 
78  policy_type policy(num_cells,team_size,vector_size);
79  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const team_member& team)
80  {
81  const int team_rank = team.team_rank();
82  const size_t cell = team.league_rank();
83  local_scalar_type value, value2;
84  for (int basis=team_rank; basis<num_basis; basis+=team_size) {
85  value = 0.0;
86  value2 = 0.0;
87  for (int qp=0; qp<num_points; ++qp) {
88  for (int dim=0; dim<num_dim; ++dim)
89  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
90  value2 += src(cell,qp)*wbs(cell,basis,qp);
91  }
92  residual(cell,basis) = value+value2;
93  }
94  });
95 }
96 
97 template <typename FadType, int N, typename ExecSpace>
98 double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points,
99  int ndim, int ntrial, bool check)
100 {
101  static const int FadStride = is_cuda_space<ExecSpace>::value ? 32 : 1;
102 #if defined(SACADO_ALIGN_SFAD)
103  static const int Nalign = ((N+FadStride-1)/FadStride)*FadStride;
104  typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
105 #else
106  typedef FadType AlignedFadType;
107 #endif
108 
109  typedef typename ExecSpace::array_layout DefaultLayout;
111  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
112  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
113  typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
114  typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
115 
116  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
117  t_3DView_d wbs("",ncells,num_basis,num_points);
118  t_3DView flux("",ncells,num_points,ndim,N+1);
119  t_2DView src("",ncells,num_points,N+1);
120  t_2DView residual("",ncells,num_basis,N+1);
121  init_fad(wgb, wbs, flux, src, residual);
122 
123  // Run once to warm up, complete any UVM transfers
124  run_fad_hierarchical_flat(flux, wgb, src, wbs, residual);
125 
126  // Time execution
127  Kokkos::fence();
128  Kokkos::Timer timer;
129  for (int i=0; i<ntrial; ++i)
130  run_fad_hierarchical_flat(flux, wgb, src, wbs, residual);
131  Kokkos::fence();
132  double time = timer.seconds() / ntrial / ncells;
133 
134  // Check result
135  if (check)
136  check_residual(flux, wgb, src, wbs, residual);
137 
138  return time;
139 }
140 
141 template <typename FadType, int N, typename ExecSpace>
142 double time_fad_hierarchical_team(int ncells, int num_basis, int num_points,
143  int ndim, int ntrial, bool check)
144 {
145  static const int FadStride = is_cuda_space<ExecSpace>::value ? 32 : 1;
146 #if defined(SACADO_ALIGN_SFAD)
147  static const int Nalign = ((N+FadStride-1)/FadStride)*FadStride;
148  typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
149 #else
150  typedef FadType AlignedFadType;
151 #endif
152 
153  typedef typename ExecSpace::array_layout DefaultLayout;
155  typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
156  typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
157  typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
158  typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
159 
160  t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
161  t_3DView_d wbs("",ncells,num_basis,num_points);
162  t_3DView flux("",ncells,num_points,ndim,N+1);
163  t_2DView src("",ncells,num_points,N+1);
164  t_2DView residual("",ncells,num_basis,N+1);
165  init_fad(wgb, wbs, flux, src, residual);
166 
167  // Run once to warm up, complete any UVM transfers
168  run_fad_hierarchical_team(flux, wgb, src, wbs, residual);
169 
170  // Time execution
171  Kokkos::fence();
172  Kokkos::Timer timer;
173  for (int i=0; i<ntrial; ++i)
174  run_fad_hierarchical_team(flux, wgb, src, wbs, residual);
175  Kokkos::fence();
176  double time = timer.seconds() / ntrial / ncells;
177 
178  // Check result
179  if (check)
180  check_residual(flux, wgb, src, wbs, residual);
181 
182  return time;
183 }
184 
185 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
186  template double time_fad_hierarchical_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
187  template double time_fad_hierarchical_team< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
188 
189 #define INST_FUNC_DEV(DEV) \
190  INST_FUNC_FAD_N_DEV( SFadType, fad_dim, DEV ) \
191  INST_FUNC_FAD_N_DEV( SLFadType, fad_dim, DEV )
192 
193 #ifdef KOKKOS_ENABLE_SERIAL
194 INST_FUNC_DEV(Kokkos::Serial)
195 #endif
196 
197 #ifdef KOKKOS_ENABLE_OPENMP
198 INST_FUNC_DEV(Kokkos::OpenMP)
199 #endif
200 
201 #ifdef KOKKOS_ENABLE_THREADS
202 INST_FUNC_DEV(Kokkos::Threads)
203 #endif
204 
205 #ifdef KOKKOS_ENABLE_CUDA
206 INST_FUNC_DEV(Kokkos::Cuda)
207 #endif
void run_fad_hierarchical_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
#define INST_FUNC_DEV(DEV)
double time_fad_hierarchical_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
const int N
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
int value
double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_fad_hierarchical_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)