17 #ifndef KOKKOS_BITSET_HPP
18 #define KOKKOS_BITSET_HPP
19 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
20 #define KOKKOS_IMPL_PUBLIC_INCLUDE
21 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET
24 #include <Kokkos_Core.hpp>
25 #include <Kokkos_Functional.hpp>
27 #include <impl/Kokkos_Bitset_impl.hpp>
31 template <
typename Device = Kokkos::DefaultExecutionSpace>
34 template <
typename Device = Kokkos::DefaultExecutionSpace>
37 template <
typename DstDevice,
typename SrcDevice>
40 template <
typename DstDevice,
typename SrcDevice>
43 template <
typename DstDevice,
typename SrcDevice>
47 template <
typename Device>
50 using execution_space =
typename Device::execution_space;
51 using size_type =
unsigned int;
53 static constexpr
unsigned BIT_SCAN_REVERSE = 1u;
54 static constexpr
unsigned MOVE_HINT_BACKWARD = 2u;
56 static constexpr
unsigned BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u;
57 static constexpr
unsigned BIT_SCAN_REVERSE_MOVE_HINT_FORWARD =
59 static constexpr
unsigned BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD =
61 static constexpr
unsigned BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD =
62 BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD;
66 block_size =
static_cast<unsigned>(
sizeof(unsigned) * CHAR_BIT)
68 enum :
unsigned { block_mask = block_size - 1u };
70 block_shift = Kokkos::Impl::integral_power_of_two(block_size)
74 using block_view_type = View<unsigned*, Device, MemoryTraits<RandomAccess>>;
80 Bitset(
unsigned arg_size) :
Bitset(Kokkos::view_alloc(), arg_size) {}
83 Bitset(
const Impl::ViewCtorProp<P...>& arg_prop,
unsigned arg_size)
84 : m_size(arg_size), m_last_block_mask(0u) {
86 using alloc_prop_t = std::decay_t<decltype(arg_prop)>;
87 static_assert(alloc_prop_t::initialize,
88 "Allocation property 'initialize' should be true.");
90 !alloc_prop_t::has_pointer,
91 "Allocation properties should not contain the 'pointer' property.");
94 const auto prop_copy =
95 Impl::with_properties_if_unset(arg_prop, std::string(
"Bitset"));
97 block_view_type(prop_copy, ((m_size + block_mask) >> block_shift));
99 for (
int i = 0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
100 m_last_block_mask |= 1u << i;
104 KOKKOS_DEFAULTED_FUNCTION
107 KOKKOS_DEFAULTED_FUNCTION
110 KOKKOS_DEFAULTED_FUNCTION
113 KOKKOS_DEFAULTED_FUNCTION
116 KOKKOS_DEFAULTED_FUNCTION
121 KOKKOS_FORCEINLINE_FUNCTION
122 unsigned size()
const {
return m_size; }
127 Impl::BitsetCount<Bitset<Device>> f(*
this);
134 Kokkos::deep_copy(m_blocks, ~0u);
136 if (m_last_block_mask) {
138 Kokkos::Impl::DeepCopy<typename Device::memory_space, Kokkos::HostSpace>(
139 m_blocks.data() + (m_blocks.extent(0) - 1u), &m_last_block_mask,
142 "Bitset::set: fence after clearing unused bits copying from "
149 void reset() { Kokkos::deep_copy(m_blocks, 0u); }
153 void clear() { Kokkos::deep_copy(m_blocks, 0u); }
157 KOKKOS_FORCEINLINE_FUNCTION
158 bool set(
unsigned i)
const {
160 unsigned* block_ptr = &m_blocks[i >> block_shift];
161 const unsigned mask = 1u << static_cast<int>(i & block_mask);
163 return !(atomic_fetch_or(block_ptr, mask) & mask);
170 KOKKOS_FORCEINLINE_FUNCTION
173 unsigned* block_ptr = &m_blocks[i >> block_shift];
174 const unsigned mask = 1u << static_cast<int>(i & block_mask);
176 return atomic_fetch_and(block_ptr, ~mask) & mask;
183 KOKKOS_FORCEINLINE_FUNCTION
186 #ifdef KOKKOS_ENABLE_SYCL
187 const unsigned block = Kokkos::atomic_load(&m_blocks[i >> block_shift]);
189 const unsigned block = volatile_load(&m_blocks[i >> block_shift]);
191 const unsigned mask = 1u << static_cast<int>(i & block_mask);
200 KOKKOS_FORCEINLINE_FUNCTION
201 unsigned max_hint()
const {
return m_blocks.extent(0); }
207 KOKKOS_INLINE_FUNCTION
210 unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD)
const {
211 const unsigned block_idx =
212 (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0;
213 const unsigned offset = hint & block_mask;
214 #ifdef KOKKOS_ENABLE_SYCL
215 unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
217 unsigned block = volatile_load(&m_blocks[block_idx]);
219 block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1))
221 : block & m_last_block_mask;
223 return find_any_helper(block_idx, offset, block, scan_direction);
230 KOKKOS_INLINE_FUNCTION
233 unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD)
const {
234 const unsigned block_idx = hint >> block_shift;
235 const unsigned offset = hint & block_mask;
236 #ifdef KOKKOS_ENABLE_SYCL
237 unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
239 unsigned block = volatile_load(&m_blocks[block_idx]);
241 block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1))
243 : ~block & m_last_block_mask;
245 return find_any_helper(block_idx, offset, block, scan_direction);
248 KOKKOS_INLINE_FUNCTION constexpr
bool is_allocated()
const {
249 return m_blocks.is_allocated();
253 KOKKOS_FORCEINLINE_FUNCTION
255 unsigned offset,
unsigned block,
256 unsigned scan_direction)
const {
260 result.second = update_hint(block_idx, offset, scan_direction);
263 scan_block((block_idx << block_shift), offset, block, scan_direction);
268 KOKKOS_FORCEINLINE_FUNCTION
269 unsigned scan_block(
unsigned block_start,
int offset,
unsigned block,
270 unsigned scan_direction)
const {
271 offset = !(scan_direction & BIT_SCAN_REVERSE)
273 : (offset + block_mask) & block_mask;
274 block = Impl::rotate_right(block, offset);
275 return (((!(scan_direction & BIT_SCAN_REVERSE)
276 ? Impl::bit_scan_forward(block)
277 : Impl::int_log2(block)) +
283 KOKKOS_FORCEINLINE_FUNCTION
284 unsigned update_hint(
long long block_idx,
unsigned offset,
285 unsigned scan_direction)
const {
286 block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
287 block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1;
289 block_idx < static_cast<long long>(m_blocks.extent(0)) ? block_idx : 0;
291 return static_cast<unsigned>(block_idx) * block_size + offset;
296 unsigned m_last_block_mask = 0;
297 block_view_type m_blocks;
300 template <
typename DDevice>
303 template <
typename DDevice>
304 friend class ConstBitset;
306 template <
typename Bitset>
307 friend struct Impl::BitsetCount;
309 template <
typename DstDevice,
typename SrcDevice>
310 friend void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice>
const& src);
312 template <
typename DstDevice,
typename SrcDevice>
313 friend void deep_copy(Bitset<DstDevice>& dst,
314 ConstBitset<SrcDevice>
const& src);
319 template <
typename Device>
322 using execution_space =
typename Device::execution_space;
323 using size_type =
unsigned int;
324 using block_view_type =
typename Bitset<Device>::block_view_type::const_type;
327 enum { block_size =
static_cast<unsigned>(
sizeof(unsigned) * CHAR_BIT) };
328 enum { block_mask = block_size - 1u };
329 enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
333 ConstBitset() : m_size(0) {}
336 ConstBitset(Bitset<Device>
const& rhs)
337 : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
340 ConstBitset(ConstBitset<Device>
const& rhs)
341 : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
344 ConstBitset<Device>& operator=(Bitset<Device>
const& rhs) {
345 this->m_size = rhs.m_size;
346 this->m_blocks = rhs.m_blocks;
352 ConstBitset<Device>& operator=(ConstBitset<Device>
const& rhs) {
353 this->m_size = rhs.m_size;
354 this->m_blocks = rhs.m_blocks;
359 KOKKOS_FORCEINLINE_FUNCTION
360 unsigned size()
const {
return m_size; }
362 unsigned count()
const {
363 Impl::BitsetCount<ConstBitset<Device>> f(*
this);
367 KOKKOS_FORCEINLINE_FUNCTION
368 bool test(
unsigned i)
const {
370 const unsigned block = m_blocks[i >> block_shift];
371 const unsigned mask = 1u << static_cast<int>(i & block_mask);
379 block_view_type m_blocks;
382 template <
typename DDevice>
383 friend class ConstBitset;
385 template <
typename Bitset>
386 friend struct Impl::BitsetCount;
388 template <
typename DstDevice,
typename SrcDevice>
389 friend void deep_copy(Bitset<DstDevice>& dst,
390 ConstBitset<SrcDevice>
const& src);
392 template <
typename DstDevice,
typename SrcDevice>
393 friend void deep_copy(ConstBitset<DstDevice>& dst,
394 ConstBitset<SrcDevice>
const& src);
397 template <
typename DstDevice,
typename SrcDevice>
398 void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice>
const& src) {
399 if (dst.size() != src.size()) {
400 Kokkos::Impl::throw_runtime_exception(
401 "Error: Cannot deep_copy bitsets of different sizes!");
404 Kokkos::fence(
"Bitset::deep_copy: fence before copy operation");
405 Kokkos::Impl::DeepCopy<
typename DstDevice::memory_space,
406 typename SrcDevice::memory_space>(
407 dst.m_blocks.data(), src.m_blocks.data(),
408 sizeof(unsigned) * src.m_blocks.extent(0));
409 Kokkos::fence(
"Bitset::deep_copy: fence after copy operation");
412 template <
typename DstDevice,
typename SrcDevice>
413 void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice>
const& src) {
414 if (dst.size() != src.size()) {
415 Kokkos::Impl::throw_runtime_exception(
416 "Error: Cannot deep_copy bitsets of different sizes!");
419 Kokkos::fence(
"Bitset::deep_copy: fence before copy operation");
420 Kokkos::Impl::DeepCopy<
typename DstDevice::memory_space,
421 typename SrcDevice::memory_space>(
422 dst.m_blocks.data(), src.m_blocks.data(),
423 sizeof(unsigned) * src.m_blocks.extent(0));
424 Kokkos::fence(
"Bitset::deep_copy: fence after copy operation");
427 template <
typename DstDevice,
typename SrcDevice>
428 void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice>
const& src) {
429 if (dst.size() != src.size()) {
430 Kokkos::Impl::throw_runtime_exception(
431 "Error: Cannot deep_copy bitsets of different sizes!");
434 Kokkos::fence(
"Bitset::deep_copy: fence before copy operation");
435 Kokkos::Impl::DeepCopy<
typename DstDevice::memory_space,
436 typename SrcDevice::memory_space>(
437 dst.m_blocks.data(), src.m_blocks.data(),
438 sizeof(unsigned) * src.m_blocks.extent(0));
439 Kokkos::fence(
"Bitset::deep_copy: fence after copy operation");
444 #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET
445 #undef KOKKOS_IMPL_PUBLIC_INCLUDE
446 #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET
448 #endif // KOKKOS_BITSET_HPP
A thread safe view to a bitset.
KOKKOS_FORCEINLINE_FUNCTION unsigned max_hint() const
Replacement for std::pair that works on CUDA devices.
Bitset(const Impl::ViewCtorProp< P...> &arg_prop, unsigned arg_size)
KOKKOS_FORCEINLINE_FUNCTION bool reset(unsigned i) const
KOKKOS_FORCEINLINE_FUNCTION bool test(unsigned i) const
KOKKOS_FORCEINLINE_FUNCTION unsigned size() const
KOKKOS_INLINE_FUNCTION Kokkos::pair< bool, unsigned > find_any_set_near(unsigned hint, unsigned scan_direction=BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const
Bitset(unsigned arg_size)
arg_size := number of bit in set
KOKKOS_FORCEINLINE_FUNCTION bool set(unsigned i) const
KOKKOS_INLINE_FUNCTION Kokkos::pair< bool, unsigned > find_any_unset_near(unsigned hint, unsigned scan_direction=BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const