17 #ifndef KOKKOS_BIT_MANIPULATION_HPP
18 #define KOKKOS_BIT_MANIPULATION_HPP
20 #include <Kokkos_Macros.hpp>
21 #include <Kokkos_NumericTraits.hpp>
24 #include <type_traits>
26 namespace Kokkos::Impl {
29 KOKKOS_FUNCTION constexpr T byteswap_fallback(T x) {
30 if constexpr (
sizeof(T) > 1) {
31 using U = std::make_unsigned_t<T>;
33 size_t shift = CHAR_BIT * (
sizeof(T) - 1);
35 U lo_mask =
static_cast<unsigned char>(~0);
36 U hi_mask = lo_mask << shift;
40 for (
size_t i = 0; i <
sizeof(T) / 2; ++i) {
41 U lo_val = val & lo_mask;
42 U hi_val = val & hi_mask;
44 val = (val & ~lo_mask) | (hi_val >> shift);
45 val = (val & ~hi_mask) | (lo_val << shift);
50 shift -=
static_cast<size_t>(2) * CHAR_BIT;
59 KOKKOS_FUNCTION constexpr
int countl_zero_fallback(T x) {
62 using ::Kokkos::Experimental::digits_v;
64 int c = digits_v<T> / 2;
73 return n -
static_cast<int>(x);
77 KOKKOS_FUNCTION constexpr
int countr_zero_fallback(T x) {
78 using ::Kokkos::Experimental::digits_v;
79 return digits_v<T> - countl_zero_fallback(static_cast<T>(
80 static_cast<T>(~x) & static_cast<T>(x - 1)));
84 KOKKOS_FUNCTION constexpr
int popcount_fallback(T x) {
86 for (; x != 0; x &= x - 1) {
93 inline constexpr
bool is_standard_unsigned_integer_type_v =
94 std::is_same_v<T, unsigned char> || std::is_same_v<T, unsigned short> ||
95 std::is_same_v<T, unsigned int> || std::is_same_v<T, unsigned long> ||
96 std::is_same_v<T, unsigned long long>;
103 template <
class To,
class From>
104 KOKKOS_FUNCTION std::enable_if_t<
sizeof(To) ==
sizeof(From) &&
105 std::is_trivially_copyable_v<To> &&
106 std::is_trivially_copyable_v<From>,
108 bit_cast(From
const& from) noexcept {
109 #if defined(KOKKOS_ENABLE_SYCL)
110 return sycl::bit_cast<To>(from);
113 memcpy(static_cast<void*>(&to), static_cast<const void*>(&from),
sizeof(To));
121 KOKKOS_FUNCTION constexpr std::enable_if_t<std::is_integral_v<T>, T> byteswap(
123 return Impl::byteswap_fallback(value);
129 KOKKOS_FUNCTION constexpr std::enable_if_t<
130 Impl::is_standard_unsigned_integer_type_v<T>,
int>
131 countl_zero(T x) noexcept {
132 using ::Kokkos::Experimental::digits_v;
133 if (x == 0)
return digits_v<T>;
135 return Impl::countl_zero_fallback(x);
139 KOKKOS_FUNCTION constexpr std::enable_if_t<
140 Impl::is_standard_unsigned_integer_type_v<T>,
int>
141 countl_one(T x) noexcept {
142 using ::Kokkos::Experimental::digits_v;
143 using ::Kokkos::Experimental::finite_max_v;
144 if (x == finite_max_v<T>)
return digits_v<T>;
145 return countl_zero(static_cast<T>(~x));
149 KOKKOS_FUNCTION constexpr std::enable_if_t<
150 Impl::is_standard_unsigned_integer_type_v<T>,
int>
151 countr_zero(T x) noexcept {
152 using ::Kokkos::Experimental::digits_v;
153 if (x == 0)
return digits_v<T>;
155 return Impl::countr_zero_fallback(x);
159 KOKKOS_FUNCTION constexpr std::enable_if_t<
160 Impl::is_standard_unsigned_integer_type_v<T>,
int>
161 countr_one(T x) noexcept {
162 using ::Kokkos::Experimental::digits_v;
163 using ::Kokkos::Experimental::finite_max_v;
164 if (x == finite_max_v<T>)
return digits_v<T>;
165 return countr_zero(static_cast<T>(~x));
169 KOKKOS_FUNCTION constexpr std::enable_if_t<
170 Impl::is_standard_unsigned_integer_type_v<T>,
int>
171 popcount(T x) noexcept {
172 if (x == 0)
return 0;
174 return Impl::popcount_fallback(x);
180 KOKKOS_FUNCTION constexpr std::enable_if_t<
181 Impl::is_standard_unsigned_integer_type_v<T>,
bool>
182 has_single_bit(T x) noexcept {
183 return x != 0 && (((x & (x - 1)) == 0));
187 KOKKOS_FUNCTION constexpr std::enable_if_t<
188 Impl::is_standard_unsigned_integer_type_v<T>, T>
189 bit_ceil(T x) noexcept {
190 if (x <= 1)
return 1;
191 using ::Kokkos::Experimental::digits_v;
192 return T{1} << (digits_v<T> - countl_zero(static_cast<T>(x - 1)));
196 KOKKOS_FUNCTION constexpr std::enable_if_t<
197 Impl::is_standard_unsigned_integer_type_v<T>, T>
198 bit_floor(T x) noexcept {
199 if (x == 0)
return 0;
200 using ::Kokkos::Experimental::digits_v;
201 return T{1} << (digits_v<T> - 1 - countl_zero(x));
205 KOKKOS_FUNCTION constexpr std::enable_if_t<
206 Impl::is_standard_unsigned_integer_type_v<T>, T>
207 bit_width(T x) noexcept {
208 if (x == 0)
return 0;
209 using ::Kokkos::Experimental::digits_v;
210 return digits_v<T> - countl_zero(x);
216 [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
217 Impl::is_standard_unsigned_integer_type_v<T>, T>
218 rotl(T x,
int s) noexcept {
219 using Experimental::digits_v;
220 constexpr
auto dig = digits_v<T>;
221 int const rem = s % dig;
222 if (rem == 0)
return x;
223 if (rem > 0)
return (x << rem) | (x >> ((dig - rem) % dig));
224 return (x >> -rem) | (x << ((dig + rem) % dig));
228 [[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
229 Impl::is_standard_unsigned_integer_type_v<T>, T>
230 rotr(T x,
int s) noexcept {
231 using Experimental::digits_v;
232 constexpr
auto dig = digits_v<T>;
233 int const rem = s % dig;
234 if (rem == 0)
return x;
235 if (rem > 0)
return (x >> rem) | (x << ((dig - rem) % dig));
236 return (x << -rem) | (x >> ((dig + rem) % dig));
242 namespace Kokkos::Impl {
244 #if defined(KOKKOS_COMPILER_CLANG) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \
245 defined(KOKKOS_COMPILER_GNU)
246 #define KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
250 KOKKOS_IMPL_DEVICE_FUNCTION T byteswap_builtin_device(T x) noexcept {
251 return byteswap_fallback(x);
255 KOKKOS_IMPL_HOST_FUNCTION T byteswap_builtin_host(T x) noexcept {
256 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
257 if constexpr (
sizeof(T) == 1) {
259 }
else if constexpr (
sizeof(T) == 2) {
260 return __builtin_bswap16(x);
261 }
else if constexpr (
sizeof(T) == 4) {
262 return __builtin_bswap32(x);
263 }
else if constexpr (
sizeof(T) == 8) {
264 return __builtin_bswap64(x);
265 }
else if constexpr (
sizeof(T) == 16) {
266 #if defined(__has_builtin)
267 #if __has_builtin(__builtin_bswap128)
268 return __builtin_bswap128(x);
271 return (__builtin_bswap64(x >> 64) |
272 (static_cast<T>(__builtin_bswap64(x)) << 64));
276 return byteswap_fallback(x);
280 KOKKOS_IMPL_DEVICE_FUNCTION
281 std::enable_if_t<is_standard_unsigned_integer_type_v<T>,
int>
282 countl_zero_builtin_device(T x) noexcept {
283 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
284 if constexpr (
sizeof(T) ==
sizeof(
long long int))
285 return __clzll(reinterpret_cast<long long int&>(x));
286 if constexpr (
sizeof(T) ==
sizeof(
int))
287 return __clz(reinterpret_cast<
int&>(x));
288 using ::Kokkos::Experimental::digits_v;
289 constexpr
int shift = digits_v<
unsigned int> - digits_v<T>;
290 return __clz(x) - shift;
291 #elif defined(KOKKOS_ENABLE_SYCL)
294 return countl_zero_fallback(x);
299 KOKKOS_IMPL_HOST_FUNCTION
300 std::enable_if_t<is_standard_unsigned_integer_type_v<T>,
int>
301 countl_zero_builtin_host(T x) noexcept {
302 using ::Kokkos::Experimental::digits_v;
303 if (x == 0)
return digits_v<T>;
304 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
305 if constexpr (std::is_same_v<T, unsigned long long>) {
306 return __builtin_clzll(x);
307 }
else if constexpr (std::is_same_v<T, unsigned long>) {
308 return __builtin_clzl(x);
309 }
else if constexpr (std::is_same_v<T, unsigned int>) {
310 return __builtin_clz(x);
312 constexpr
int shift = digits_v<unsigned int> - digits_v<T>;
313 return __builtin_clz(x) - shift;
316 return countl_zero_fallback(x);
321 KOKKOS_IMPL_DEVICE_FUNCTION
322 std::enable_if_t<is_standard_unsigned_integer_type_v<T>,
int>
323 countr_zero_builtin_device(T x) noexcept {
324 using ::Kokkos::Experimental::digits_v;
325 if (x == 0)
return digits_v<T>;
326 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
327 if constexpr (
sizeof(T) ==
sizeof(
long long int))
328 return __ffsll(reinterpret_cast<long long int&>(x)) - 1;
329 return __ffs(reinterpret_cast<int&>(x)) - 1;
330 #elif defined(KOKKOS_ENABLE_SYCL)
333 return countr_zero_fallback(x);
338 KOKKOS_IMPL_HOST_FUNCTION
339 std::enable_if_t<is_standard_unsigned_integer_type_v<T>,
int>
340 countr_zero_builtin_host(T x) noexcept {
341 using ::Kokkos::Experimental::digits_v;
342 if (x == 0)
return digits_v<T>;
343 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
344 if constexpr (std::is_same_v<T, unsigned long long>) {
345 return __builtin_ctzll(x);
346 }
else if constexpr (std::is_same_v<T, unsigned long>) {
347 return __builtin_ctzl(x);
349 return __builtin_ctz(x);
352 return countr_zero_fallback(x);
357 KOKKOS_IMPL_DEVICE_FUNCTION
358 std::enable_if_t<is_standard_unsigned_integer_type_v<T>,
int>
359 popcount_builtin_device(T x) noexcept {
360 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
361 if constexpr (
sizeof(T) ==
sizeof(
long long int))
return __popcll(x);
363 #elif defined(KOKKOS_ENABLE_SYCL)
364 return sycl::popcount(x);
366 return popcount_fallback(x);
371 KOKKOS_IMPL_HOST_FUNCTION
372 std::enable_if_t<is_standard_unsigned_integer_type_v<T>,
int>
373 popcount_builtin_host(T x) noexcept {
374 #ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
375 if constexpr (std::is_same_v<T, unsigned long long>) {
376 return __builtin_popcountll(x);
377 }
else if constexpr (std::is_same_v<T, unsigned long>) {
378 return __builtin_popcountl(x);
380 return __builtin_popcount(x);
383 return popcount_fallback(x);
387 #undef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
391 namespace Kokkos::Experimental {
393 template <
class To,
class From>
394 KOKKOS_FUNCTION std::enable_if_t<
sizeof(To) ==
sizeof(From) &&
395 std::is_trivially_copyable_v<To> &&
396 std::is_trivially_copyable_v<From>,
398 bit_cast_builtin(From
const& from) noexcept {
400 return Kokkos::bit_cast<To>(from);
404 KOKKOS_FUNCTION std::enable_if_t<std::is_integral_v<T>, T> byteswap_builtin(
406 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::byteswap_builtin_device(x);))
407 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::byteswap_builtin_host(x);))
409 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
415 KOKKOS_FUNCTION std::enable_if_t<
416 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>,
int>
417 countl_zero_builtin(T x) noexcept {
418 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countl_zero_builtin_device(x);))
419 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countl_zero_builtin_host(x);))
421 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
427 KOKKOS_FUNCTION std::enable_if_t<
428 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>,
int>
429 countl_one_builtin(T x) noexcept {
430 if (x == finite_max_v<T>)
return digits_v<T>;
431 return countl_zero_builtin(static_cast<T>(~x));
435 KOKKOS_FUNCTION std::enable_if_t<
436 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>,
int>
437 countr_zero_builtin(T x) noexcept {
438 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countr_zero_builtin_device(x);))
439 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countr_zero_builtin_host(x);))
441 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
447 KOKKOS_FUNCTION std::enable_if_t<
448 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>,
int>
449 countr_one_builtin(T x) noexcept {
450 if (x == finite_max_v<T>)
return digits_v<T>;
451 return countr_zero_builtin(static_cast<T>(~x));
455 KOKKOS_FUNCTION std::enable_if_t<
456 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>,
int>
457 popcount_builtin(T x) noexcept {
458 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::popcount_builtin_device(x);))
459 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::popcount_builtin_host(x);))
461 #if defined(KOKKOS_ENABLE_OPENACC) && defined(KOKKOS_COMPILER_NVHPC)
467 KOKKOS_FUNCTION std::enable_if_t<
468 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>,
bool>
469 has_single_bit_builtin(T x) noexcept {
470 return has_single_bit(x);
475 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
476 bit_ceil_builtin(T x) noexcept {
477 if (x <= 1)
return 1;
478 return T{1} << (digits_v<T> - countl_zero_builtin(static_cast<T>(x - 1)));
483 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
484 bit_floor_builtin(T x) noexcept {
485 if (x == 0)
return 0;
486 return T{1} << (digits_v<T> - 1 - countl_zero_builtin(x));
491 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
492 bit_width_builtin(T x) noexcept {
493 if (x == 0)
return 0;
494 return digits_v<T> - countl_zero_builtin(x);
498 [[nodiscard]] KOKKOS_FUNCTION
499 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
500 rotl_builtin(T x,
int s) noexcept {
505 [[nodiscard]] KOKKOS_FUNCTION
506 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
507 rotr_builtin(T x,
int s) noexcept {