Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_BitManipulation.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_BIT_MANIPULATION_HPP
18 #define KOKKOS_BIT_MANIPULATION_HPP
19 
20 #include <Kokkos_Macros.hpp>
21 #include <Kokkos_NumericTraits.hpp>
22 #include <climits> // CHAR_BIT
23 #include <cstring> //memcpy
24 #include <type_traits>
25 
26 namespace Kokkos::Impl {
27 
28 template <class T>
29 KOKKOS_FUNCTION constexpr T byteswap_fallback(T x) {
30  if constexpr (sizeof(T) > 1) {
31  using U = std::make_unsigned_t<T>;
32 
33  size_t shift = CHAR_BIT * (sizeof(T) - 1);
34 
35  U lo_mask = static_cast<unsigned char>(~0);
36  U hi_mask = lo_mask << shift;
37 
38  U val = x;
39 
40  for (size_t i = 0; i < sizeof(T) / 2; ++i) {
41  U lo_val = val & lo_mask;
42  U hi_val = val & hi_mask;
43 
44  val = (val & ~lo_mask) | (hi_val >> shift);
45  val = (val & ~hi_mask) | (lo_val << shift);
46 
47  lo_mask <<= CHAR_BIT;
48  hi_mask >>= CHAR_BIT;
49 
50  shift -= static_cast<size_t>(2) * CHAR_BIT;
51  }
52  return val;
53  }
54  // sizeof(T) == 1
55  return x;
56 }
57 
58 template <class T>
59 KOKKOS_FUNCTION constexpr int countl_zero_fallback(T x) {
60  // From Hacker's Delight (2nd edition) section 5-3
61  unsigned int y = 0;
62  using ::Kokkos::Experimental::digits_v;
63  int n = digits_v<T>;
64  int c = digits_v<T> / 2;
65  do {
66  y = x >> c;
67  if (y != 0) {
68  n -= c;
69  x = y;
70  }
71  c >>= 1;
72  } while (c != 0);
73  return n - static_cast<int>(x);
74 }
75 
76 template <class T>
77 KOKKOS_FUNCTION constexpr int countr_zero_fallback(T x) {
78  using ::Kokkos::Experimental::digits_v;
79  return digits_v<T> - countl_zero_fallback(static_cast<T>(
80  static_cast<T>(~x) & static_cast<T>(x - 1)));
81 }
82 
83 template <class T>
84 KOKKOS_FUNCTION constexpr int popcount_fallback(T x) {
85  int c = 0;
86  for (; x != 0; x &= x - 1) {
87  ++c;
88  }
89  return c;
90 }
91 
92 template <class T>
93 inline constexpr bool is_standard_unsigned_integer_type_v =
94  std::is_same_v<T, unsigned char> || std::is_same_v<T, unsigned short> ||
95  std::is_same_v<T, unsigned int> || std::is_same_v<T, unsigned long> ||
96  std::is_same_v<T, unsigned long long>;
97 
98 } // namespace Kokkos::Impl
99 
100 namespace Kokkos {
101 
102 //<editor-fold desc="[bit.cast], bit_cast">
103 template <class To, class From>
104 KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
105  std::is_trivially_copyable_v<To> &&
106  std::is_trivially_copyable_v<From>,
107  To>
108 bit_cast(From const& from) noexcept {
109 #if defined(KOKKOS_ENABLE_SYCL)
110  return sycl::bit_cast<To>(from);
111 #else
112  To to;
113  memcpy(static_cast<void*>(&to), static_cast<const void*>(&from), sizeof(To));
114  return to;
115 #endif
116 }
117 //</editor-fold>
118 
119 //<editor-fold desc="[bit.byteswap], byteswap">
120 template <class T>
121 KOKKOS_FUNCTION constexpr std::enable_if_t<std::is_integral_v<T>, T> byteswap(
122  T value) noexcept {
123  return Impl::byteswap_fallback(value);
124 }
125 //</editor-fold>
126 
127 //<editor-fold desc="[bit.count], counting">
128 template <class T>
129 KOKKOS_FUNCTION constexpr std::enable_if_t<
130  Impl::is_standard_unsigned_integer_type_v<T>, int>
131 countl_zero(T x) noexcept {
132  using ::Kokkos::Experimental::digits_v;
133  if (x == 0) return digits_v<T>;
134  // TODO use compiler intrinsics when available
135  return Impl::countl_zero_fallback(x);
136 }
137 
138 template <class T>
139 KOKKOS_FUNCTION constexpr std::enable_if_t<
140  Impl::is_standard_unsigned_integer_type_v<T>, int>
141 countl_one(T x) noexcept {
142  using ::Kokkos::Experimental::digits_v;
143  using ::Kokkos::Experimental::finite_max_v;
144  if (x == finite_max_v<T>) return digits_v<T>;
145  return countl_zero(static_cast<T>(~x));
146 }
147 
148 template <class T>
149 KOKKOS_FUNCTION constexpr std::enable_if_t<
150  Impl::is_standard_unsigned_integer_type_v<T>, int>
151 countr_zero(T x) noexcept {
152  using ::Kokkos::Experimental::digits_v;
153  if (x == 0) return digits_v<T>;
154  // TODO use compiler intrinsics when available
155  return Impl::countr_zero_fallback(x);
156 }
157 
158 template <class T>
159 KOKKOS_FUNCTION constexpr std::enable_if_t<
160  Impl::is_standard_unsigned_integer_type_v<T>, int>
161 countr_one(T x) noexcept {
162  using ::Kokkos::Experimental::digits_v;
163  using ::Kokkos::Experimental::finite_max_v;
164  if (x == finite_max_v<T>) return digits_v<T>;
165  return countr_zero(static_cast<T>(~x));
166 }
167 
168 template <class T>
169 KOKKOS_FUNCTION constexpr std::enable_if_t<
170  Impl::is_standard_unsigned_integer_type_v<T>, int>
171 popcount(T x) noexcept {
172  if (x == 0) return 0;
173  // TODO use compiler intrinsics when available
174  return Impl::popcount_fallback(x);
175 }
176 //</editor-fold>
177 
178 //<editor-fold desc="[bit.pow.two], integral powers of 2">
179 template <class T>
180 KOKKOS_FUNCTION constexpr std::enable_if_t<
181  Impl::is_standard_unsigned_integer_type_v<T>, bool>
182 has_single_bit(T x) noexcept {
183  return x != 0 && (((x & (x - 1)) == 0));
184 }
185 
186 template <class T>
187 KOKKOS_FUNCTION constexpr std::enable_if_t<
188  Impl::is_standard_unsigned_integer_type_v<T>, T>
189 bit_ceil(T x) noexcept {
190  if (x <= 1) return 1;
191  using ::Kokkos::Experimental::digits_v;
192  return T{1} << (digits_v<T> - countl_zero(static_cast<T>(x - 1)));
193 }
194 
195 template <class T>
196 KOKKOS_FUNCTION constexpr std::enable_if_t<
197  Impl::is_standard_unsigned_integer_type_v<T>, T>
198 bit_floor(T x) noexcept {
199  if (x == 0) return 0;
200  using ::Kokkos::Experimental::digits_v;
201  return T{1} << (digits_v<T> - 1 - countl_zero(x));
202 }
203 
204 template <class T>
205 KOKKOS_FUNCTION constexpr std::enable_if_t<
206  Impl::is_standard_unsigned_integer_type_v<T>, T>
207 bit_width(T x) noexcept {
208  if (x == 0) return 0;
209  using ::Kokkos::Experimental::digits_v;
210  return digits_v<T> - countl_zero(x);
211 }
212 //</editor-fold>
213 
214 //<editor-fold desc="[bit.rotate], rotating">
215 template <class T>
216 [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
217  Impl::is_standard_unsigned_integer_type_v<T>, T>
218 rotl(T x, int s) noexcept {
219  using Experimental::digits_v;
220  constexpr auto dig = digits_v<T>;
221  int const rem = s % dig;
222  if (rem == 0) return x;
223  if (rem > 0) return (x << rem) | (x >> ((dig - rem) % dig));
224  return (x >> -rem) | (x << ((dig + rem) % dig)); // rotr(x, -rem)
225 }
226 
227 template <class T>
228 [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
229  Impl::is_standard_unsigned_integer_type_v<T>, T>
230 rotr(T x, int s) noexcept {
231  using Experimental::digits_v;
232  constexpr auto dig = digits_v<T>;
233  int const rem = s % dig;
234  if (rem == 0) return x;
235  if (rem > 0) return (x >> rem) | (x << ((dig - rem) % dig));
236  return (x << -rem) | (x >> ((dig + rem) % dig)); // rotl(x, -rem)
237 }
238 //</editor-fold>
239 
240 } // namespace Kokkos
241 
242 namespace Kokkos::Impl {
243 
244 #if defined(KOKKOS_COMPILER_CLANG) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \
245  defined(KOKKOS_COMPILER_GNU)
246 #define KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
247 #endif
248 
249 template <class T>
250 KOKKOS_IMPL_DEVICE_FUNCTION T byteswap_builtin_device(T x) noexcept {
251  return byteswap_fallback(x);
252 }
253 
254 template <class T>
255 KOKKOS_IMPL_HOST_FUNCTION T byteswap_builtin_host(T x) noexcept {
256 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
257  if constexpr (sizeof(T) == 1) {
258  return x;
259  } else if constexpr (sizeof(T) == 2) {
260  return __builtin_bswap16(x);
261  } else if constexpr (sizeof(T) == 4) {
262  return __builtin_bswap32(x);
263  } else if constexpr (sizeof(T) == 8) {
264  return __builtin_bswap64(x);
265  } else if constexpr (sizeof(T) == 16) {
266 #if defined(__has_builtin)
267 #if __has_builtin(__builtin_bswap128)
268  return __builtin_bswap128(x);
269 #endif
270 #endif
271  return (__builtin_bswap64(x >> 64) |
272  (static_cast<T>(__builtin_bswap64(x)) << 64));
273  }
274 #endif
275 
276  return byteswap_fallback(x);
277 }
278 
279 template <class T>
280 KOKKOS_IMPL_DEVICE_FUNCTION
281  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
282  countl_zero_builtin_device(T x) noexcept {
283 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
284  if constexpr (sizeof(T) == sizeof(long long int))
285  return __clzll(reinterpret_cast<long long int&>(x));
286  if constexpr (sizeof(T) == sizeof(int))
287  return __clz(reinterpret_cast<int&>(x));
288  using ::Kokkos::Experimental::digits_v;
289  constexpr int shift = digits_v<unsigned int> - digits_v<T>;
290  return __clz(x) - shift;
291 #elif defined(KOKKOS_ENABLE_SYCL)
292  return sycl::clz(x);
293 #else
294  return countl_zero_fallback(x);
295 #endif
296 }
297 
298 template <class T>
299 KOKKOS_IMPL_HOST_FUNCTION
300  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
301  countl_zero_builtin_host(T x) noexcept {
302  using ::Kokkos::Experimental::digits_v;
303  if (x == 0) return digits_v<T>;
304 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
305  if constexpr (std::is_same_v<T, unsigned long long>) {
306  return __builtin_clzll(x);
307  } else if constexpr (std::is_same_v<T, unsigned long>) {
308  return __builtin_clzl(x);
309  } else if constexpr (std::is_same_v<T, unsigned int>) {
310  return __builtin_clz(x);
311  } else {
312  constexpr int shift = digits_v<unsigned int> - digits_v<T>;
313  return __builtin_clz(x) - shift;
314  }
315 #else
316  return countl_zero_fallback(x);
317 #endif
318 }
319 
320 template <class T>
321 KOKKOS_IMPL_DEVICE_FUNCTION
322  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
323  countr_zero_builtin_device(T x) noexcept {
324  using ::Kokkos::Experimental::digits_v;
325  if (x == 0) return digits_v<T>;
326 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
327  if constexpr (sizeof(T) == sizeof(long long int))
328  return __ffsll(reinterpret_cast<long long int&>(x)) - 1;
329  return __ffs(reinterpret_cast<int&>(x)) - 1;
330 #elif defined(KOKKOS_ENABLE_SYCL)
331  return sycl::ctz(x);
332 #else
333  return countr_zero_fallback(x);
334 #endif
335 }
336 
337 template <class T>
338 KOKKOS_IMPL_HOST_FUNCTION
339  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
340  countr_zero_builtin_host(T x) noexcept {
341  using ::Kokkos::Experimental::digits_v;
342  if (x == 0) return digits_v<T>;
343 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
344  if constexpr (std::is_same_v<T, unsigned long long>) {
345  return __builtin_ctzll(x);
346  } else if constexpr (std::is_same_v<T, unsigned long>) {
347  return __builtin_ctzl(x);
348  } else {
349  return __builtin_ctz(x);
350  }
351 #else
352  return countr_zero_fallback(x);
353 #endif
354 }
355 
356 template <class T>
357 KOKKOS_IMPL_DEVICE_FUNCTION
358  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
359  popcount_builtin_device(T x) noexcept {
360 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
361  if constexpr (sizeof(T) == sizeof(long long int)) return __popcll(x);
362  return __popc(x);
363 #elif defined(KOKKOS_ENABLE_SYCL)
364  return sycl::popcount(x);
365 #else
366  return popcount_fallback(x);
367 #endif
368 }
369 
370 template <class T>
371 KOKKOS_IMPL_HOST_FUNCTION
372  std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
373  popcount_builtin_host(T x) noexcept {
374 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
375  if constexpr (std::is_same_v<T, unsigned long long>) {
376  return __builtin_popcountll(x);
377  } else if constexpr (std::is_same_v<T, unsigned long>) {
378  return __builtin_popcountl(x);
379  } else {
380  return __builtin_popcount(x);
381  }
382 #else
383  return popcount_fallback(x);
384 #endif
385 }
386 
387 #undef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
388 
389 } // namespace Kokkos::Impl
390 
391 namespace Kokkos::Experimental {
392 
393 template <class To, class From>
394 KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
395  std::is_trivially_copyable_v<To> &&
396  std::is_trivially_copyable_v<From>,
397  To>
398 bit_cast_builtin(From const& from) noexcept {
399  // qualify the call to avoid ADL
400  return Kokkos::bit_cast<To>(from); // no benefit to call the _builtin variant
401 }
402 
403 template <class T>
404 KOKKOS_FUNCTION std::enable_if_t<std::is_integral_v<T>, T> byteswap_builtin(
405  T x) noexcept {
406  KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::byteswap_builtin_device(x);))
407  KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::byteswap_builtin_host(x);))
408 // FIXME-NVHPC: erroneous warning about return from non-void function
409 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
410  return T();
411 #endif
412 }
413 
414 template <class T>
415 KOKKOS_FUNCTION std::enable_if_t<
416  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
417 countl_zero_builtin(T x) noexcept {
418  KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countl_zero_builtin_device(x);))
419  KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countl_zero_builtin_host(x);))
420 // FIXME-NVHPC: erroneous warning about return from non-void function
421 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
422  return 0;
423 #endif
424 }
425 
426 template <class T>
427 KOKKOS_FUNCTION std::enable_if_t<
428  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
429 countl_one_builtin(T x) noexcept {
430  if (x == finite_max_v<T>) return digits_v<T>;
431  return countl_zero_builtin(static_cast<T>(~x));
432 }
433 
434 template <class T>
435 KOKKOS_FUNCTION std::enable_if_t<
436  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
437 countr_zero_builtin(T x) noexcept {
438  KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countr_zero_builtin_device(x);))
439  KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countr_zero_builtin_host(x);))
440 // FIXME-NVHPC: erroneous warning about return from non-void function
441 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
442  return 0;
443 #endif
444 }
445 
446 template <class T>
447 KOKKOS_FUNCTION std::enable_if_t<
448  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
449 countr_one_builtin(T x) noexcept {
450  if (x == finite_max_v<T>) return digits_v<T>;
451  return countr_zero_builtin(static_cast<T>(~x));
452 }
453 
454 template <class T>
455 KOKKOS_FUNCTION std::enable_if_t<
456  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
457 popcount_builtin(T x) noexcept {
458  KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::popcount_builtin_device(x);))
459  KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::popcount_builtin_host(x);))
460 // FIXME-NVHPC: erroneous warning about return from non-void function
461 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
462  return 0;
463 #endif
464 }
465 
466 template <class T>
467 KOKKOS_FUNCTION std::enable_if_t<
468  ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, bool>
469 has_single_bit_builtin(T x) noexcept {
470  return has_single_bit(x); // no benefit to call the _builtin variant
471 }
472 
473 template <class T>
474 KOKKOS_FUNCTION
475  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
476  bit_ceil_builtin(T x) noexcept {
477  if (x <= 1) return 1;
478  return T{1} << (digits_v<T> - countl_zero_builtin(static_cast<T>(x - 1)));
479 }
480 
481 template <class T>
482 KOKKOS_FUNCTION
483  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
484  bit_floor_builtin(T x) noexcept {
485  if (x == 0) return 0;
486  return T{1} << (digits_v<T> - 1 - countl_zero_builtin(x));
487 }
488 
489 template <class T>
490 KOKKOS_FUNCTION
491  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
492  bit_width_builtin(T x) noexcept {
493  if (x == 0) return 0;
494  return digits_v<T> - countl_zero_builtin(x);
495 }
496 
497 template <class T>
498 [[nodiscard]] KOKKOS_FUNCTION
499  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
500  rotl_builtin(T x, int s) noexcept {
501  return rotl(x, s); // no benefit to call the _builtin variant
502 }
503 
504 template <class T>
505 [[nodiscard]] KOKKOS_FUNCTION
506  std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
507  rotr_builtin(T x, int s) noexcept {
508  return rotr(x, s); // no benefit to call the _builtin variant
509 }
510 
511 } // namespace Kokkos::Experimental
512 
513 #endif