Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
advection_hierarchical.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Sacado Package
5 // Copyright (2006) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25 // (etphipp@sandia.gov).
26 //
27 // ***********************************************************************
28 // @HEADER
29 
30 #define SACADO_VIEW_CUDA_HIERARCHICAL 1
31 #define SACADO_ALIGN_SFAD 1
32 
33 #include "Sacado.hpp"
35 #include "common.hpp"
36 
37 #include "impl/Kokkos_Timer.hpp"
38 
39 template<typename FluxView, typename WgbView, typename SrcView,
40  typename WbsView, typename ResidualView>
41 void run_fad_hierarchical_flat(const FluxView& flux, const WgbView& wgb,
42  const SrcView& src, const WbsView& wbs,
43  const ResidualView& residual)
44 {
45  typedef typename ResidualView::execution_space execution_space;
46  typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
47  typedef Kokkos::TeamPolicy<execution_space> policy_type;
48  typedef typename policy_type::member_type team_member;
49 
50  const size_t num_cells = wgb.extent(0);
51  const int num_basis = wgb.extent(1);
52  const int num_points = wgb.extent(2);
53  const int num_dim = wgb.extent(3);
54 
55  const bool is_cuda = is_cuda_space<execution_space>::value;
56  const int vector_size = is_cuda ? 32 : 1;
57  const int team_size = is_cuda ? 256/vector_size : 1;
58  const size_t range = (num_cells+team_size-1)/team_size;
59 
60  policy_type policy(range,team_size,vector_size);
61  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const team_member& team)
62  {
63  const size_t cell = team.league_rank()*team_size + team.team_rank();
64  local_scalar_type value, value2;
65  for (int basis=0; basis<num_basis; ++basis) {
66  value = 0.0;
67  value2 = 0.0;
68  for (int qp=0; qp<num_points; ++qp) {
69  for (int dim=0; dim<num_dim; ++dim)
70  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
71  value2 += src(cell,qp)*wbs(cell,basis,qp);
72  }
73  residual(cell,basis) = value+value2;
74  }
75  });
76 }
77 
78 template<typename FluxView, typename WgbView, typename SrcView,
79  typename WbsView, typename ResidualView>
80 void run_fad_hierarchical_team(const FluxView& flux, const WgbView& wgb,
81  const SrcView& src, const WbsView& wbs,
82  const ResidualView& residual)
83 {
84  typedef typename ResidualView::execution_space execution_space;
85  typedef typename Kokkos::ThreadLocalScalarType<ResidualView>::type local_scalar_type;
86  typedef Kokkos::TeamPolicy<execution_space> policy_type;
87  typedef typename policy_type::member_type team_member;
88 
89  const size_t num_cells = wgb.extent(0);
90  const int num_basis = wgb.extent(1);
91  const int num_points = wgb.extent(2);
92  const int num_dim = wgb.extent(3);
93 
94  const bool is_cuda = is_cuda_space<execution_space>::value;
95  const int vector_size = is_cuda ? 32 : 1;
96  const int team_size = is_cuda ? 256/vector_size : 1;
97 
98  policy_type policy(num_cells,team_size,vector_size);
99  Kokkos::parallel_for(policy, KOKKOS_LAMBDA (const team_member& team)
100  {
101  const int team_rank = team.team_rank();
102  const size_t cell = team.league_rank();
103  local_scalar_type value, value2;
104  for (int basis=team_rank; basis<num_basis; basis+=team_size) {
105  value = 0.0;
106  value2 = 0.0;
107  for (int qp=0; qp<num_points; ++qp) {
108  for (int dim=0; dim<num_dim; ++dim)
109  value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
110  value2 += src(cell,qp)*wbs(cell,basis,qp);
111  }
112  residual(cell,basis) = value+value2;
113  }
114  });
115 }
116 
117 template <typename FadType, int N, typename ExecSpace>
118 double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points,
119  int ndim, int ntrial, bool check)
120 {
121  static const int FadStride = is_cuda_space<ExecSpace>::value ? 32 : 1;
122 #if defined(SACADO_ALIGN_SFAD)
123  static const int Nalign = ((N+FadStride-1)/FadStride)*FadStride;
124  typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
125 #else
126  typedef FadType AlignedFadType;
127 #endif
128 
129  typedef typename ExecSpace::array_layout DefaultLayout;
131  typedef Kokkos::View<AlignedFadType****,ContLayout,ExecSpace> t_4DView;
132  typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
133  typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
134 
135  t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
136  t_3DView flux("",ncells,num_points,ndim,N+1);
137  t_3DView wbs("",ncells,num_basis,num_points,N+1);
138  t_2DView src("",ncells,num_points,N+1);
139  t_2DView residual("",ncells,num_basis,N+1);
140  init_fad(wgb, wbs, flux, src, residual);
141 
142  // Run once to warm up, complete any UVM transfers
143  run_fad_hierarchical_flat(flux, wgb, src, wbs, residual);
144 
145  // Time execution
146  Kokkos::fence();
147  Kokkos::Impl::Timer timer;
148  for (int i=0; i<ntrial; ++i)
149  run_fad_hierarchical_flat(flux, wgb, src, wbs, residual);
150  Kokkos::fence();
151  double time = timer.seconds() / ntrial / ncells;
152 
153  // Check result
154  if (check)
155  check_residual(flux, wgb, src, wbs, residual);
156 
157  return time;
158 }
159 
160 template <typename FadType, int N, typename ExecSpace>
161 double time_fad_hierarchical_team(int ncells, int num_basis, int num_points,
162  int ndim, int ntrial, bool check)
163 {
164  static const int FadStride = is_cuda_space<ExecSpace>::value ? 32 : 1;
165 #if defined(SACADO_ALIGN_SFAD)
166  static const int Nalign = ((N+FadStride-1)/FadStride)*FadStride;
167  typedef typename FadType::template apply_N<Nalign>::type AlignedFadType;
168 #else
169  typedef FadType AlignedFadType;
170 #endif
171 
172  typedef typename ExecSpace::array_layout DefaultLayout;
174  typedef Kokkos::View<AlignedFadType****,ContLayout,ExecSpace> t_4DView;
175  typedef Kokkos::View<AlignedFadType***,ContLayout,ExecSpace> t_3DView;
176  typedef Kokkos::View<AlignedFadType**,ContLayout,ExecSpace> t_2DView;
177 
178  t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
179  t_3DView flux("",ncells,num_points,ndim,N+1);
180  t_3DView wbs("",ncells,num_basis,num_points,N+1);
181  t_2DView src("",ncells,num_points,N+1);
182  t_2DView residual("",ncells,num_basis,N+1);
183  init_fad(wgb, wbs, flux, src, residual);
184 
185  // Run once to warm up, complete any UVM transfers
186  run_fad_hierarchical_team(flux, wgb, src, wbs, residual);
187 
188  // Time execution
189  Kokkos::fence();
190  Kokkos::Impl::Timer timer;
191  for (int i=0; i<ntrial; ++i)
192  run_fad_hierarchical_team(flux, wgb, src, wbs, residual);
193  Kokkos::fence();
194  double time = timer.seconds() / ntrial / ncells;
195 
196  // Check result
197  if (check)
198  check_residual(flux, wgb, src, wbs, residual);
199 
200  return time;
201 }
202 
203 #define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
204  template double time_fad_hierarchical_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
205  template double time_fad_hierarchical_team< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
206 
207 #define INST_FUNC_DEV(DEV) \
208  INST_FUNC_FAD_N_DEV( SFadType, fad_dim, DEV ) \
209  INST_FUNC_FAD_N_DEV( SLFadType, fad_dim, DEV )
210 
211 #ifdef KOKKOS_ENABLE_SERIAL
212 INST_FUNC_DEV(Kokkos::Serial)
213 #endif
214 
215 #ifdef KOKKOS_ENABLE_OPENMP
216 INST_FUNC_DEV(Kokkos::OpenMP)
217 #endif
218 
219 #ifdef KOKKOS_ENABLE_THREADS
220 INST_FUNC_DEV(Kokkos::Threads)
221 #endif
222 
223 #ifdef KOKKOS_ENABLE_CUDA
224 INST_FUNC_DEV(Kokkos::Cuda)
225 #endif
void run_fad_hierarchical_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
std::enable_if< !Kokkos::is_view_fad< View2 >::value, bool >::type check(const View1 &v_gold, const View2 &v, const double tol)
double time_fad_hierarchical_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
const int N
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
#define INST_FUNC_DEV(DEV)
double time_fad_hierarchical_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_fad_hierarchical_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)