Sacado Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Fad_KokkosAtomicTests.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Sacado Package
4 //
5 // Copyright 2006 NTESS and the Sacado contributors.
6 // SPDX-License-Identifier: LGPL-2.1-or-later
7 // *****************************************************************************
8 // @HEADER
9 
11 
12 #include "Sacado.hpp"
13 
14 template <typename T>
15 struct is_dfad {
16  static const bool value = false;
17 };
18 
19 template <typename T>
20 struct is_dfad< Sacado::Fad::Exp::DFad<T> > {
21  static const bool value = true;
22 };
23 
24 template <typename FadType1, typename FadType2>
25 bool checkFads(const FadType1& x, const FadType2& x2,
26  Teuchos::FancyOStream& out, double tol = 1.0e-15)
27 {
28  bool success = true;
29 
30  // Check sizes match
31  TEUCHOS_TEST_EQUALITY(x.size(), x2.size(), out, success);
32 
33  // Check values match
34  TEUCHOS_TEST_FLOATING_EQUALITY(x.val(), x2.val(), tol, out, success);
35 
36  // Check derivatives match
37  for (int i=0; i<x.size(); ++i)
38  TEUCHOS_TEST_FLOATING_EQUALITY(x.dx(i), x2.dx(i), tol, out, success);
39 
40  return success;
41 }
42 
43 template <typename fadtype, typename ordinal>
44 inline
45 fadtype generate_fad( const ordinal num_rows,
46  const ordinal num_cols,
47  const ordinal fad_size,
48  const ordinal row,
49  const ordinal col )
50 {
51  typedef typename fadtype::value_type scalar;
52  fadtype x(fad_size, scalar(0.0));
53 
54  const scalar x_row = 100.0 + scalar(num_rows) / scalar(row+1);
55  const scalar x_col = 10.0 + scalar(num_cols) / scalar(col+1);
56  x.val() = x_row + x_col;
57  for (ordinal i=0; i<fad_size; ++i) {
58  const scalar x_fad = 1.0 + scalar(fad_size) / scalar(i+1);
59  x.fastAccessDx(i) = x_row + x_col + x_fad;
60  }
61  return x;
62 }
63 
64 #ifndef GLOBAL_FAD_SIZE
65 #define GLOBAL_FAD_SIZE 5
66 #endif
67 const int global_num_rows = 11;
68 const int global_num_cols = 7;
70 
71 struct AddTag {
72  static double init() { return 0.0; }
73  template <typename T1, typename T2>
74  static auto apply(const T1& a, const T2& b) -> decltype(a+b)
75  {
76  return a+b;
77  }
78 };
79 struct SubTag {
80  static double init() { return 0.0; }
81  template <typename T1, typename T2>
82  static auto apply(const T1& a, const T2& b) -> decltype(a-b)
83  {
84  return a-b;
85  }
86 };
87 struct MulTag {
88  static double init() { return 1.0; }
89  template <typename T1, typename T2>
90  static auto apply(const T1& a, const T2& b) -> decltype(a*b)
91  {
92  return a*b;
93  }
94 };
95 struct DivTag {
96  static double init() { return 1.0; }
97  template <typename T1, typename T2>
98  static auto apply(const T1& a, const T2& b) -> decltype(a/b)
99  {
100  return a/b;
101  }
102 };
103 struct MaxTag {
104  static double init() { return 1.0; }
105  template <typename T1, typename T2>
106  static auto apply(const T1& a, const T2& b) -> decltype(max(a,b))
107  {
108  return max(a,b);
109  }
110 };
111 struct MinTag {
112  static double init() { return 1.0; }
113  template <typename T1, typename T2>
114  static auto apply(const T1& a, const T2& b) -> decltype(min(a,b))
115  {
116  return min(a,b);
117  }
118 };
119 
120 // Kernel to test atomic_add
121 template <typename ViewType, typename ScalarViewType, bool OperFetch>
122 struct AtomicKernel {
123  typedef typename ViewType::execution_space execution_space;
124  typedef typename ViewType::size_type size_type;
125  typedef typename Kokkos::TeamPolicy< execution_space>::member_type team_handle;
126  typedef typename Kokkos::ThreadLocalScalarType<ViewType>::type local_scalar_type;
127  static const size_type stride = Kokkos::ViewScalarStride<ViewType>::stride;
128 
129  const ViewType m_v;
130  const ScalarViewType m_s;
131 
132  AtomicKernel(const ViewType& v, const ScalarViewType& s) :
133  m_v(v), m_s(s) {};
134 
135  KOKKOS_INLINE_FUNCTION
136  void operator() (AddTag tag, const size_type i) const {
137  local_scalar_type x = m_v(i);
138  if (OperFetch)
139  Kokkos::atomic_add_fetch(&(m_s()), x);
140  else
141  Kokkos::atomic_fetch_add(&(m_s()), x);
142  }
143 
144  KOKKOS_INLINE_FUNCTION
145  void operator() (SubTag tag, const size_type i) const {
146  local_scalar_type x = m_v(i);
147  if (OperFetch)
148  Kokkos::atomic_sub_fetch(&(m_s()), x);
149  else
150  Kokkos::atomic_fetch_sub(&(m_s()), x);
151  }
152 
153  KOKKOS_INLINE_FUNCTION
154  void operator() (MulTag tag, const size_type i) const {
155  local_scalar_type x = m_v(i);
156  if (OperFetch)
157  Kokkos::atomic_mul_fetch(&(m_s()), x);
158  else
159  Kokkos::atomic_fetch_mul(&(m_s()), x);
160  }
161 
162  KOKKOS_INLINE_FUNCTION
163  void operator() (DivTag tag, const size_type i) const {
164  local_scalar_type x = m_v(i);
165  if (OperFetch)
166  Kokkos::atomic_div_fetch(&(m_s()), x);
167  else
168  Kokkos::atomic_fetch_div(&(m_s()), x);
169  }
170 
171  KOKKOS_INLINE_FUNCTION
172  void operator() (MaxTag tag, const size_type i) const {
173  local_scalar_type x = m_v(i);
174  if (OperFetch)
175  Kokkos::atomic_max_fetch(&(m_s()), x);
176  else
177  Kokkos::atomic_fetch_max(&(m_s()), x);
178  }
179 
180  KOKKOS_INLINE_FUNCTION
181  void operator() (MinTag tag, const size_type i) const {
182  local_scalar_type x = m_v(i);
183  if (OperFetch)
184  Kokkos::atomic_min_fetch(&(m_s()), x);
185  else
186  Kokkos::atomic_fetch_min(&(m_s()), x);
187  }
188 
189  template <typename Tag>
190  KOKKOS_INLINE_FUNCTION
191  void operator()( Tag tag, const team_handle& team ) const
192  {
193  const size_type i = team.league_rank()*team.team_size() + team.team_rank();
194  if (i < m_v.extent(0))
195  (*this)(tag, i);
196  }
197 
198  // Kernel launch
199  template <typename Tag>
200  static void apply(Tag tag, const ViewType& v, const ScalarViewType& s) {
201  const size_type nrow = v.extent(0);
202 
203 #if defined (KOKKOS_ENABLE_CUDA) && defined (SACADO_VIEW_CUDA_HIERARCHICAL)
204  const bool use_team =
207  ( stride > 1 );
208 #elif defined (KOKKOS_ENABLE_CUDA) && defined (SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
209  const bool use_team =
213 #elif defined (KOKKOS_ENABLE_HIP) && defined (SACADO_VIEW_CUDA_HIERARCHICAL)
214  const bool use_team =
217  ( stride > 1 );
218 #elif defined (KOKKOS_ENABLE_HIP) && defined (SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
219  const bool use_team =
222  is_dfad<typename ViewType::non_const_value_type>::value;
223 #else
224  const bool use_team = false;
225 #endif
226 
227  if (use_team) {
228  const size_type team_size = 256 / stride;
229  Kokkos::TeamPolicy<execution_space, Tag> policy(
230  (nrow+team_size-1)/team_size, team_size, stride );
231  Kokkos::parallel_for( policy, AtomicKernel(v,s) );
232  }
233  else {
234  Kokkos::RangePolicy<execution_space, Tag> policy( 0, nrow );
235  Kokkos::parallel_for( policy, AtomicKernel(v,s) );
236  }
237  }
238 };
239 
240 template <typename FadType, typename Layout, typename Device, bool OperFetch,
241  typename TagType>
242 bool testAtomic(const TagType& tag, Teuchos::FancyOStream& out)
243 {
244  typedef Kokkos::View<FadType*,Layout,Device> ViewType;
245  typedef Kokkos::View<FadType,Layout,Device> ScalarViewType;
246  typedef typename ViewType::size_type size_type;
247  typedef typename ViewType::HostMirror host_view_type;
248  typedef typename ScalarViewType::HostMirror host_scalar_view_type;
249 
250  const size_type num_rows = global_num_rows;
251  const size_type fad_size = global_fad_size;
252 
253  // Create and fill view
254  ViewType v;
255  ScalarViewType s0;
256 #if defined (SACADO_DISABLE_FAD_VIEW_SPEC)
257  v = ViewType ("view", num_rows);
258  s0 = ScalarViewType ("");
259 #else
260  v = ViewType ("view", num_rows, fad_size+1);
261  s0 = ScalarViewType ("", fad_size+1);
262 #endif
263  host_view_type h_v = Kokkos::create_mirror_view(v);
264  for (size_type i=0; i<num_rows; ++i)
265  h_v(i) =
266  generate_fad<FadType>(num_rows, size_type(1), fad_size, i, size_type(0));
267  Kokkos::deep_copy(v, h_v);
268 
269  Kokkos::deep_copy(s0, tag.init());
270 
271  // Create scalar view
272  ScalarViewType s;
273 #if defined (SACADO_DISABLE_FAD_VIEW_SPEC)
274  s = ScalarViewType ("scalar view");
275 #else
276  s = ScalarViewType ("scalar view", fad_size+1);
277 #endif
278  Kokkos::deep_copy( s, tag.init() );
279 
280  // Call atomic_add kernel, which adds up entries in v
282 
283  // Copy to host
284  host_scalar_view_type hs = Kokkos::create_mirror_view(s);
285  Kokkos::deep_copy(hs, s);
286 
287  // Compute correct result
288  auto b = Kokkos::create_mirror_view(s0);
289  Kokkos::deep_copy(b, s0);
290 
291  for (size_type i=0; i<num_rows; ++i)
292  b() = tag.apply(b(), h_v(i));
293 
294  // Check
295  bool success = checkFads(b(), hs(), out);
296 
297  return success;
298 }
299 
300 // Test atomic_oper_fetch form
301 
303  Kokkos_View_Fad, AtomicAddFetch, FadType, Layout, Device )
304 {
305  success = testAtomic<FadType, Layout, Device, true>(AddTag(), out);
306 }
307 
309  Kokkos_View_Fad, AtomicSubFetch, FadType, Layout, Device )
310 {
311  success = testAtomic<FadType, Layout, Device, true>(SubTag(), out);
312 }
313 
315  Kokkos_View_Fad, AtomicMulFetch, FadType, Layout, Device )
316 {
317  success = testAtomic<FadType, Layout, Device, true>(MulTag(), out);
318 }
319 
321  Kokkos_View_Fad, AtomicDivFetch, FadType, Layout, Device )
322 {
323  success = testAtomic<FadType, Layout, Device, true>(DivTag(), out);
324 }
325 
327  Kokkos_View_Fad, AtomicMaxFetch, FadType, Layout, Device )
328 {
329  success = testAtomic<FadType, Layout, Device, true>(MaxTag(), out);
330 }
331 
333  Kokkos_View_Fad, AtomicMinFetch, FadType, Layout, Device )
334 {
335  success = testAtomic<FadType, Layout, Device, true>(MinTag(), out);
336 }
337 
338 // Test atomic_fetch_oper form
339 
341  Kokkos_View_Fad, AtomicFetchAdd, FadType, Layout, Device )
342 {
343  success = testAtomic<FadType, Layout, Device, false>(AddTag(), out);
344 }
345 
347  Kokkos_View_Fad, AtomicFetchSub, FadType, Layout, Device )
348 {
349  success = testAtomic<FadType, Layout, Device, false>(SubTag(), out);
350 }
351 
353  Kokkos_View_Fad, AtomicFetchMul, FadType, Layout, Device )
354 {
355  success = testAtomic<FadType, Layout, Device, false>(MulTag(), out);
356 }
357 
359  Kokkos_View_Fad, AtomicFetchDiv, FadType, Layout, Device )
360 {
361  success = testAtomic<FadType, Layout, Device, false>(DivTag(), out);
362 }
363 
365  Kokkos_View_Fad, AtomicFetchMax, FadType, Layout, Device )
366 {
367  success = testAtomic<FadType, Layout, Device, false>(MaxTag(), out);
368 }
369 
371  Kokkos_View_Fad, AtomicFetchMin, FadType, Layout, Device )
372 {
373  success = testAtomic<FadType, Layout, Device, false>(MinTag(), out);
374 }
375 
376 #define VIEW_FAD_TESTS_FLD( F, L, D ) \
377  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicAddFetch, F, L, D ) \
378  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicSubFetch, F, L, D ) \
379  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMulFetch, F, L, D ) \
380  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicDivFetch, F, L, D ) \
381  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMaxFetch, F, L, D ) \
382  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMinFetch, F, L, D ) \
383  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchAdd, F, L, D ) \
384  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchSub, F, L, D ) \
385  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMul, F, L, D ) \
386  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchDiv, F, L, D ) \
387  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMax, F, L, D ) \
388  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMin, F, L, D )
389 
390 using Kokkos::LayoutLeft;
391 using Kokkos::LayoutRight;
394 
395 #define VIEW_FAD_TESTS_FD( F, D ) \
396  VIEW_FAD_TESTS_FLD( F, LayoutLeft, D ) \
397  VIEW_FAD_TESTS_FLD( F, LayoutRight, D ) \
398  VIEW_FAD_TESTS_FLD( F, LeftContiguous, D ) \
399  VIEW_FAD_TESTS_FLD( F, RightContiguous, D )
400 
401 // Full set of atomics only implemented for new design
402 #if SACADO_ENABLE_NEW_DESIGN
406 
407 #if SACADO_TEST_DFAD
408 #define VIEW_FAD_TESTS_D( D ) \
409  VIEW_FAD_TESTS_FD( SFadType, D ) \
410  VIEW_FAD_TESTS_FD( SLFadType, D ) \
411  VIEW_FAD_TESTS_FD( DFadType, D )
412 #else
413 #define VIEW_FAD_TESTS_D( D ) \
414  VIEW_FAD_TESTS_FD( SFadType, D ) \
415  VIEW_FAD_TESTS_FD( SLFadType, D )
416 #endif
417 
418 #else
419 
420 #define VIEW_FAD_TESTS_D( D ) /* */
421 
422 #endif
static double init()
KOKKOS_INLINE_FUNCTION void operator()(Tag tag, const team_handle &team) const
static auto apply(const T1 &a, const T2 &b) -> decltype(a/b)
Kokkos::LayoutContiguous< Kokkos::LayoutRight > RightContiguous
Kokkos::LayoutContiguous< Kokkos::LayoutLeft > LeftContiguous
#define TEUCHOS_TEST_FLOATING_EQUALITY(v1, v2, tol, out, success)
Kokkos::ThreadLocalScalarType< ViewType >::type local_scalar_type
static const bool value
const int global_fad_size
Sacado::Fad::DFad< double > FadType
static double init()
ViewType::size_type size_type
bool checkFads(const FadType1 &x, const FadType2 &x2, Teuchos::FancyOStream &out, double tol=1.0e-15)
const int global_num_rows
AtomicKernel(const ViewType &v, const ScalarViewType &s)
scalar generate_fad(const size_t n0, const size_t n1, const size_t n2, const size_t n3, const int fad_size, const size_t i0, const size_t i1, const size_t i2, const size_t i3, const int i_fad)
Sacado::Fad::SFad< double, fad_dim > SFadType
static double init()
static const size_type stride
static auto apply(const T1 &a, const T2 &b) -> decltype(a-b)
TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(Kokkos_View_FadFad, DeepCopy, FadFadType, Layout, Device)
const int global_num_cols
#define T2(r, f)
Definition: Sacado_rad.hpp:558
GeneralFad< DynamicStorage< T > > DFad
SimpleFad< ValueT > min(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
Forward-mode AD class templated on the storage for the derivative array.
static auto apply(const T1 &a, const T2 &b) -> decltype(a+b)
static double init()
static double init()
#define T1(r, f)
Definition: Sacado_rad.hpp:583
Sacado::Fad::SLFad< double, fad_dim > SLFadType
Sacado::Fad::DFad< double > DFadType
static double init()
static auto apply(const T1 &a, const T2 &b) -> decltype(min(a, b))
static void apply(Tag tag, const ViewType &v, const ScalarViewType &s)
#define GLOBAL_FAD_SIZE
int value
KOKKOS_INLINE_FUNCTION void operator()(AddTag tag, const size_type i) const
#define TEUCHOS_TEST_EQUALITY(v1, v2, out, success)
Kokkos::TeamPolicy< execution_space >::member_type team_handle
const ScalarViewType m_s
const double tol
SimpleFad< ValueT > max(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
ViewType::execution_space execution_space
bool testAtomic(const TagType &tag, Teuchos::FancyOStream &out)
static auto apply(const T1 &a, const T2 &b) -> decltype(a *b)
static auto apply(const T1 &a, const T2 &b) -> decltype(max(a, b))