Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_BitManipulation.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_BIT_MANIPULATION_HPP
18 #define KOKKOS_BIT_MANIPULATION_HPP
19 
20 #include <Kokkos_Macros.hpp>
21 #include <Kokkos_NumericTraits.hpp>
22 #include <climits> // CHAR_BIT
23 #include <cstring> //memcpy
24 #include <type_traits>
25 
26 namespace Kokkos::Impl {
27 
28 template <class T>
29 KOKKOS_FUNCTION constexpr T byteswap_fallback(T x) {
30  if constexpr (sizeof(T) > 1) {
31  using U = std::make_unsigned_t<T>;
32 
33  size_t shift = CHAR_BIT * (sizeof(T) - 1);
34 
35  U lo_mask = static_cast<unsigned char>(~0);
36  U hi_mask = lo_mask << shift;
37 
38  U val = x;
39 
40  for (size_t i = 0; i < sizeof(T) / 2; ++i) {
41  U lo_val = val & lo_mask;
42  U hi_val = val & hi_mask;
43 
44  val = (val & ~lo_mask) | (hi_val >> shift);
45  val = (val & ~hi_mask) | (lo_val << shift);
46 
47  lo_mask <<= CHAR_BIT;
48  hi_mask >>= CHAR_BIT;
49 
50  shift -= 2 * CHAR_BIT;
51  }
52  return val;
53  }
54  // sizeof(T) == 1
55  return x;
56 }
57 
58 template <class T>
59 KOKKOS_FUNCTION constexpr int countl_zero_fallback(T x) {
60  // From Hacker's Delight (2nd edition) section 5-3
61  unsigned int y = 0;
62  using ::Kokkos::Experimental::digits_v;
63  int n = digits_v<T>;
64  int c = digits_v<T> / 2;
65  do {
66  y = x >> c;
67  if (y != 0) {
68  n -= c;
69  x = y;
70  }
71  c >>= 1;
72  } while (c != 0);
73  return n - static_cast<int>(x);
74 }
75 
76 template <class T>
77 KOKKOS_FUNCTION constexpr int countr_zero_fallback(T x) {
78  using ::Kokkos::Experimental::digits_v;
79  return digits_v<T> - countl_zero_fallback(static_cast<T>(
80  static_cast<T>(~x) & static_cast<T>(x - 1)));
81 }
82 
83 template <class T>
84 KOKKOS_FUNCTION constexpr int popcount_fallback(T x) {
85  int c = 0;
86  for (; x != 0; x &= x - 1) {
87  ++c;
88  }
89  return c;
90 }
91 
92 template <class T>
93 inline constexpr bool is_standard_unsigned_integer_type_v =
94  std::is_same_v<T, unsigned char> || std::is_same_v<T, unsigned short> ||
95  std::is_same_v<T, unsigned int> || std::is_same_v<T, unsigned long> ||
96  std::is_same_v<T, unsigned long long>;
97 
98 } // namespace Kokkos::Impl
99 
100 namespace Kokkos {
101 
102 //<editor-fold desc="[bit.cast], bit_cast">
103 #if defined(KOKKOS_ENABLE_SYCL) && defined(__INTEL_LLVM_COMPILER) && \
104  __INTEL_LLVM_COMPILER < 20240000
105 using sycl::detail::bit_cast;
106 #else
107 template <class To, class From>
108 KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
109  std::is_trivially_copyable_v<To> &&
110  std::is_trivially_copyable_v<From>,
111  To>
112 bit_cast(From const& from) noexcept {
113 #if defined(KOKKOS_ENABLE_SYCL) && defined(__INTEL_LLVM_COMPILER) && \
114  __INTEL_LLVM_COMPILER >= 20240000
115  return sycl::bit_cast<To>(from);
116 #else
117  To to;
118  memcpy(static_cast<void*>(&to), static_cast<const void*>(&from), sizeof(To));
119  return to;
120 #endif
121 }
122 #endif
123 //</editor-fold>
124 
125 //<editor-fold desc="[bit.byteswap], byteswap">
126 template <class T>
127 KOKKOS_FUNCTION constexpr std::enable_if_t<std::is_integral_v<T>, T> byteswap(
128  T value) noexcept {
129  return Impl::byteswap_fallback(value);
130 }
131 //</editor-fold>
132 
133 //<editor-fold desc="[bit.count], counting">
134 template <class T>
135 KOKKOS_FUNCTION constexpr std::enable_if_t<
136  Impl::is_standard_unsigned_integer_type_v<T>, int>
137 countl_zero(T x) noexcept {
138  using ::Kokkos::Experimental::digits_v;
139  if (x == 0) return digits_v<T>;
140  // TODO use compiler intrinsics when available
141  return Impl::countl_zero_fallback(x);
142 }
143 
144 template <class T>
145 KOKKOS_FUNCTION constexpr std::enable_if_t<
146  Impl::is_standard_unsigned_integer_type_v<T>, int>
147 countl_one(T x) noexcept {
148  using ::Kokkos::Experimental::digits_v;
149  using ::Kokkos::Experimental::finite_max_v;
150  if (x == finite_max_v<T>) return digits_v<T>;
151  return countl_zero(static_cast<T>(~x));
152 }
153 
154 template <class T>
155 KOKKOS_FUNCTION constexpr std::enable_if_t<
156  Impl::is_standard_unsigned_integer_type_v<T>, int>
157 countr_zero(T x) noexcept {
158  using ::Kokkos::Experimental::digits_v;
159  if (x == 0) return digits_v<T>;
160  // TODO use compiler intrinsics when available
161  return Impl::countr_zero_fallback(x);
162 }
163 
164 template <class T>
165 KOKKOS_FUNCTION constexpr std::enable_if_t<
166  Impl::is_standard_unsigned_integer_type_v<T>, int>
167 countr_one(T x) noexcept {
168  using ::Kokkos::Experimental::digits_v;
169  using ::Kokkos::Experimental::finite_max_v;
170  if (x == finite_max_v<T>) return digits_v<T>;
171  return countr_zero(static_cast<T>(~x));
172 }
173 
174 template <class T>
175 KOKKOS_FUNCTION constexpr std::enable_if_t<
176  Impl::is_standard_unsigned_integer_type_v<T>, int>
177 popcount(T x) noexcept {
178  if (x == 0) return 0;
179  // TODO use compiler intrinsics when available
180  return Impl::popcount_fallback(x);
181 }
182 //</editor-fold>
183 
184 //<editor-fold desc="[bit.pow.two], integral powers of 2">
185 template <class T>
186 KOKKOS_FUNCTION constexpr std::enable_if_t<
187  Impl::is_standard_unsigned_integer_type_v<T>, bool>
188 has_single_bit(T x) noexcept {
189  return x != 0 && (((x & (x - 1)) == 0));
190 }
191 
192 template <class T>
193 KOKKOS_FUNCTION constexpr std::enable_if_t<
194  Impl::is_standard_unsigned_integer_type_v<T>, T>
195 bit_ceil(T x) noexcept {
196  if (x <= 1) return 1;
197  using ::Kokkos::Experimental::digits_v;
198  return T{1} << (digits_v<T> - countl_zero(static_cast<T>(x - 1)));
199 }
200 
201 template <class T>
202 KOKKOS_FUNCTION constexpr std::enable_if_t<
203  Impl::is_standard_unsigned_integer_type_v<T>, T>
204 bit_floor(T x) noexcept {
205  if (x == 0) return 0;
206  using ::Kokkos::Experimental::digits_v;
207  return T{1} << (digits_v<T> - 1 - countl_zero(x));
208 }
209 
210 template <class T>
211 KOKKOS_FUNCTION constexpr std::enable_if_t<
212  Impl::is_standard_unsigned_integer_type_v<T>, T>
213 bit_width(T x) noexcept {
214  if (x == 0) return 0;
215  using ::Kokkos::Experimental::digits_v;
216  return digits_v<T> - countl_zero(x);
217 }
218 //</editor-fold>
219 
220 //<editor-fold desc="[bit.rotate], rotating">
221 template <class T>
222 [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
223  Impl::is_standard_unsigned_integer_type_v<T>, T>
224 rotl(T x, int s) noexcept {
225  using Experimental::digits_v;
226  constexpr auto dig = digits_v<T>;
227  int const rem = s % dig;
228  if (rem == 0) return x;
229  if (rem > 0) return (x << rem) | (x >> ((dig - rem) % dig));
230  return (x >> -rem) | (x << ((dig + rem) % dig)); // rotr(x, -rem)
231 }
232 
233 template <class T>
234 [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
235  Impl::is_standard_unsigned_integer_type_v<T>, T>
236 rotr(T x, int s) noexcept {
237  using Experimental::digits_v;
238  constexpr auto dig = digits_v<T>;
239  int const rem = s % dig;
240  if (rem == 0) return x;
241  if (rem > 0) return (x >> rem) | (x << ((dig - rem) % dig));
242  return (x << -rem) | (x >> ((dig + rem) % dig)); // rotl(x, -rem)
243 }
244 //</editor-fold>
245 
246 } // namespace Kokkos
247 
248 namespace Kokkos::Impl {
249 
250 #if defined(KOKKOS_COMPILER_CLANG) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \
251  defined(KOKKOS_COMPILER_GNU)
252 #define KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
253 #endif
254 
255 template <class T>
256 KOKKOS_IMPL_DEVICE_FUNCTION T byteswap_builtin_device(T x) noexcept {
257  return byteswap_fallback(x);
258 }
259 
260 template <class T>
261 KOKKOS_IMPL_HOST_FUNCTION T byteswap_builtin_host(T x) noexcept {
262 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
263  if constexpr (sizeof(T) == 1) {
264  return x;
265  } else if constexpr (sizeof(T) == 2) {
266  return __builtin_bswap16(x);
267  } else if constexpr (sizeof(T) == 4) {
268  return __builtin_bswap32(x);
269  } else if constexpr (sizeof(T) == 8) {
270  return __builtin_bswap64(x);
271  } else if constexpr (sizeof(T) == 16) {
272 #if defined(__has_builtin)
273 #if __has_builtin(__builtin_bswap128)
274  return __builtin_bswap128(x);
275 #endif
276 #endif
277  return (__builtin_bswap64(x >> 64) |
278  (static_cast<T>(__builtin_bswap64(x)) << 64));
279  }
280 #endif
281 
282  return byteswap_fallback(x);
283 }
284 
285 template <class T>
286 KOKKOS_IMPL_DEVICE_FUNCTION
287  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
288  countl_zero_builtin_device(T x) noexcept {
289 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
290  if constexpr (sizeof(T) == sizeof(long long int))
291  return __clzll(reinterpret_cast<long long int&>(x));
292  if constexpr (sizeof(T) == sizeof(int))
293  return __clz(reinterpret_cast<int&>(x));
294  using ::Kokkos::Experimental::digits_v;
295  constexpr int shift = digits_v<unsigned int> - digits_v<T>;
296  return __clz(x) - shift;
297 #elif defined(KOKKOS_ENABLE_SYCL)
298  return sycl::clz(x);
299 #else
300  return countl_zero_fallback(x);
301 #endif
302 }
303 
304 template <class T>
305 KOKKOS_IMPL_HOST_FUNCTION
306  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
307  countl_zero_builtin_host(T x) noexcept {
308  using ::Kokkos::Experimental::digits_v;
309  if (x == 0) return digits_v<T>;
310 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
311  if constexpr (std::is_same_v<T, unsigned long long>) {
312  return __builtin_clzll(x);
313  } else if constexpr (std::is_same_v<T, unsigned long>) {
314  return __builtin_clzl(x);
315  } else if constexpr (std::is_same_v<T, unsigned int>) {
316  return __builtin_clz(x);
317  } else {
318  constexpr int shift = digits_v<unsigned int> - digits_v<T>;
319  return __builtin_clz(x) - shift;
320  }
321 #else
322  return countl_zero_fallback(x);
323 #endif
324 }
325 
326 template <class T>
327 KOKKOS_IMPL_DEVICE_FUNCTION
328  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
329  countr_zero_builtin_device(T x) noexcept {
330  using ::Kokkos::Experimental::digits_v;
331  if (x == 0) return digits_v<T>;
332 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
333  if constexpr (sizeof(T) == sizeof(long long int))
334  return __ffsll(reinterpret_cast<long long int&>(x)) - 1;
335  return __ffs(reinterpret_cast<int&>(x)) - 1;
336 #elif defined(KOKKOS_ENABLE_SYCL)
337  return sycl::ctz(x);
338 #else
339  return countr_zero_fallback(x);
340 #endif
341 }
342 
343 template <class T>
344 KOKKOS_IMPL_HOST_FUNCTION
345  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
346  countr_zero_builtin_host(T x) noexcept {
347  using ::Kokkos::Experimental::digits_v;
348  if (x == 0) return digits_v<T>;
349 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
350  if constexpr (std::is_same_v<T, unsigned long long>) {
351  return __builtin_ctzll(x);
352  } else if constexpr (std::is_same_v<T, unsigned long>) {
353  return __builtin_ctzl(x);
354  } else {
355  return __builtin_ctz(x);
356  }
357 #else
358  return countr_zero_fallback(x);
359 #endif
360 }
361 
362 template <class T>
363 KOKKOS_IMPL_DEVICE_FUNCTION
364  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
365  popcount_builtin_device(T x) noexcept {
366 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
367  if constexpr (sizeof(T) == sizeof(long long int)) return __popcll(x);
368  return __popc(x);
369 #elif defined(KOKKOS_ENABLE_SYCL)
370  return sycl::popcount(x);
371 #else
372  return popcount_fallback(x);
373 #endif
374 }
375 
376 template <class T>
377 KOKKOS_IMPL_HOST_FUNCTION
378  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
379  popcount_builtin_host(T x) noexcept {
380 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
381  if constexpr (std::is_same_v<T, unsigned long long>) {
382  return __builtin_popcountll(x);
383  } else if constexpr (std::is_same_v<T, unsigned long>) {
384  return __builtin_popcountl(x);
385  } else {
386  return __builtin_popcount(x);
387  }
388 #else
389  return popcount_fallback(x);
390 #endif
391 }
392 
393 #undef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
394 
395 } // namespace Kokkos::Impl
396 
397 namespace Kokkos::Experimental {
398 
399 template <class To, class From>
400 KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
401  std::is_trivially_copyable_v<To> &&
402  std::is_trivially_copyable_v<From>,
403  To>
404 bit_cast_builtin(From const& from) noexcept {
405  // qualify the call to avoid ADL
406  return Kokkos::bit_cast<To>(from); // no benefit to call the _builtin variant
407 }
408 
409 template <class T>
410 KOKKOS_FUNCTION std::enable_if_t<std::is_integral_v<T>, T> byteswap_builtin(
411  T x) noexcept {
412  KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::byteswap_builtin_device(x);))
413  KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::byteswap_builtin_host(x);))
414 }
415 
416 template <class T>
417 KOKKOS_FUNCTION std::enable_if_t<
418  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
419 countl_zero_builtin(T x) noexcept {
420  KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countl_zero_builtin_device(x);))
421  KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countl_zero_builtin_host(x);))
422 }
423 
424 template <class T>
425 KOKKOS_FUNCTION std::enable_if_t<
426  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
427 countl_one_builtin(T x) noexcept {
428  if (x == finite_max_v<T>) return digits_v<T>;
429  return countl_zero_builtin(static_cast<T>(~x));
430 }
431 
432 template <class T>
433 KOKKOS_FUNCTION std::enable_if_t<
434  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
435 countr_zero_builtin(T x) noexcept {
436  KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countr_zero_builtin_device(x);))
437  KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countr_zero_builtin_host(x);))
438 }
439 
440 template <class T>
441 KOKKOS_FUNCTION std::enable_if_t<
442  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
443 countr_one_builtin(T x) noexcept {
444  if (x == finite_max_v<T>) return digits_v<T>;
445  return countr_zero_builtin(static_cast<T>(~x));
446 }
447 
448 template <class T>
449 KOKKOS_FUNCTION std::enable_if_t<
450  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
451 popcount_builtin(T x) noexcept {
452  KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::popcount_builtin_device(x);))
453  KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::popcount_builtin_host(x);))
454 }
455 
456 template <class T>
457 KOKKOS_FUNCTION std::enable_if_t<
458  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, bool>
459 has_single_bit_builtin(T x) noexcept {
460  return has_single_bit(x); // no benefit to call the _builtin variant
461 }
462 
463 template <class T>
464 KOKKOS_FUNCTION
465  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
466  bit_ceil_builtin(T x) noexcept {
467  if (x <= 1) return 1;
468  return T{1} << (digits_v<T> - countl_zero_builtin(static_cast<T>(x - 1)));
469 }
470 
471 template <class T>
472 KOKKOS_FUNCTION
473  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
474  bit_floor_builtin(T x) noexcept {
475  if (x == 0) return 0;
476  return T{1} << (digits_v<T> - 1 - countl_zero_builtin(x));
477 }
478 
479 template <class T>
480 KOKKOS_FUNCTION
481  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
482  bit_width_builtin(T x) noexcept {
483  if (x == 0) return 0;
484  return digits_v<T> - countl_zero_builtin(x);
485 }
486 
487 template <class T>
488 [[nodiscard]] KOKKOS_FUNCTION
489  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
490  rotl_builtin(T x, int s) noexcept {
491  return rotl(x, s); // no benefit to call the _builtin variant
492 }
493 
494 template <class T>
495 [[nodiscard]] KOKKOS_FUNCTION
496  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
497  rotr_builtin(T x, int s) noexcept {
498  return rotr(x, s); // no benefit to call the _builtin variant
499 }
500 
501 } // namespace Kokkos::Experimental
502 
503 #endif