Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_Bitset.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_BITSET_HPP
18 #define KOKKOS_BITSET_HPP
19 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
20 #define KOKKOS_IMPL_PUBLIC_INCLUDE
21 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET
22 #endif
23 
24 #include <Kokkos_Core.hpp>
25 #include <Kokkos_BitManipulation.hpp>
26 #include <Kokkos_Functional.hpp>
27 
28 #include <impl/Kokkos_Bitset_impl.hpp>
29 
30 namespace Kokkos {
31 
32 template <typename Device = Kokkos::DefaultExecutionSpace>
33 class Bitset;
34 
35 template <typename Device = Kokkos::DefaultExecutionSpace>
36 class ConstBitset;
37 
38 template <typename DstDevice, typename SrcDevice>
39 void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src);
40 
41 template <typename DstDevice, typename SrcDevice>
42 void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src);
43 
44 template <typename DstDevice, typename SrcDevice>
45 void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src);
46 
48 template <typename Device>
49 class Bitset {
50  public:
51  using execution_space = typename Device::execution_space;
52  using size_type = unsigned int;
53 
54  static constexpr unsigned BIT_SCAN_REVERSE = 1u;
55  static constexpr unsigned MOVE_HINT_BACKWARD = 2u;
56 
57  static constexpr unsigned BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u;
58  static constexpr unsigned BIT_SCAN_REVERSE_MOVE_HINT_FORWARD =
59  BIT_SCAN_REVERSE;
60  static constexpr unsigned BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD =
61  MOVE_HINT_BACKWARD;
62  static constexpr unsigned BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD =
63  BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD;
64 
65  private:
66  static constexpr unsigned block_size = sizeof(unsigned) * CHAR_BIT;
67  static constexpr unsigned block_mask = block_size - 1u;
68  static constexpr unsigned block_shift =
69  Kokkos::has_single_bit(block_size) ? Kokkos::bit_width(block_size) - 1
70  : ~0u;
71 
73  using block_view_type = View<unsigned*, Device, MemoryTraits<RandomAccess>>;
74 
75  public:
76  Bitset() = default;
77 
79  Bitset(unsigned arg_size) : Bitset(Kokkos::view_alloc(), arg_size) {}
80 
81  template <class... P>
82  Bitset(const Impl::ViewCtorProp<P...>& arg_prop, unsigned arg_size)
83  : m_size(arg_size), m_last_block_mask(0u) {
85  using alloc_prop_t = std::decay_t<decltype(arg_prop)>;
86  static_assert(alloc_prop_t::initialize,
87  "Allocation property 'initialize' should be true.");
88  static_assert(
89  !alloc_prop_t::has_pointer,
90  "Allocation properties should not contain the 'pointer' property.");
91 
93  const auto prop_copy =
94  Impl::with_properties_if_unset(arg_prop, std::string("Bitset"));
95  m_blocks =
96  block_view_type(prop_copy, ((m_size + block_mask) >> block_shift));
97 
98  for (int i = 0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
99  m_last_block_mask |= 1u << i;
100  }
101  }
102 
103  KOKKOS_DEFAULTED_FUNCTION
104  Bitset(const Bitset<Device>&) = default;
105 
106  KOKKOS_DEFAULTED_FUNCTION
107  Bitset& operator=(const Bitset<Device>&) = default;
108 
109  KOKKOS_DEFAULTED_FUNCTION
110  Bitset(Bitset<Device>&&) = default;
111 
112  KOKKOS_DEFAULTED_FUNCTION
113  Bitset& operator=(Bitset<Device>&&) = default;
114 
115  KOKKOS_DEFAULTED_FUNCTION
116  ~Bitset() = default;
117 
120  KOKKOS_FORCEINLINE_FUNCTION
121  unsigned size() const { return m_size; }
122 
125  unsigned count() const {
126  Impl::BitsetCount<Bitset<Device>> f(*this);
127  return f.apply();
128  }
129 
132  void set() {
133  Kokkos::deep_copy(m_blocks, ~0u);
134 
135  if (m_last_block_mask) {
136  // clear the unused bits in the last block
137  auto last_block = Kokkos::subview(m_blocks, m_blocks.extent(0) - 1u);
138  Kokkos::deep_copy(typename Device::execution_space{}, last_block,
139  m_last_block_mask);
140  Kokkos::fence(
141  "Bitset::set: fence after clearing unused bits copying from "
142  "HostSpace");
143  }
144  }
145 
148  void reset() { Kokkos::deep_copy(m_blocks, 0u); }
149 
152  void clear() { Kokkos::deep_copy(m_blocks, 0u); }
153 
156  KOKKOS_FORCEINLINE_FUNCTION
157  bool set(unsigned i) const {
158  if (i < m_size) {
159  unsigned* block_ptr = &m_blocks[i >> block_shift];
160  const unsigned mask = 1u << static_cast<int>(i & block_mask);
161 
162  return !(atomic_fetch_or(block_ptr, mask) & mask);
163  }
164  return false;
165  }
166 
169  KOKKOS_FORCEINLINE_FUNCTION
170  bool reset(unsigned i) const {
171  if (i < m_size) {
172  unsigned* block_ptr = &m_blocks[i >> block_shift];
173  const unsigned mask = 1u << static_cast<int>(i & block_mask);
174 
175  return atomic_fetch_and(block_ptr, ~mask) & mask;
176  }
177  return false;
178  }
179 
182  KOKKOS_FORCEINLINE_FUNCTION
183  bool test(unsigned i) const {
184  if (i < m_size) {
185 #ifdef KOKKOS_ENABLE_SYCL
186  const unsigned block = Kokkos::atomic_load(&m_blocks[i >> block_shift]);
187 #else
188  const unsigned block = volatile_load(&m_blocks[i >> block_shift]);
189 #endif
190  const unsigned mask = 1u << static_cast<int>(i & block_mask);
191  return block & mask;
192  }
193  return false;
194  }
195 
199  KOKKOS_FORCEINLINE_FUNCTION
200  unsigned max_hint() const { return m_blocks.extent(0); }
201 
206  KOKKOS_INLINE_FUNCTION
208  unsigned hint,
209  unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const {
210  const unsigned block_idx =
211  (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0;
212  const unsigned offset = hint & block_mask;
213 #ifdef KOKKOS_ENABLE_SYCL
214  unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
215 #else
216  unsigned block = volatile_load(&m_blocks[block_idx]);
217 #endif
218  block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1))
219  ? block
220  : block & m_last_block_mask;
221 
222  return find_any_helper(block_idx, offset, block, scan_direction);
223  }
224 
229  KOKKOS_INLINE_FUNCTION
231  unsigned hint,
232  unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const {
233  const unsigned block_idx = hint >> block_shift;
234  const unsigned offset = hint & block_mask;
235 #ifdef KOKKOS_ENABLE_SYCL
236  unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
237 #else
238  unsigned block = volatile_load(&m_blocks[block_idx]);
239 #endif
240  block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1))
241  ? ~block
242  : ~block & m_last_block_mask;
243 
244  return find_any_helper(block_idx, offset, block, scan_direction);
245  }
246 
247  KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
248  return m_blocks.is_allocated();
249  }
250 
251  private:
252  KOKKOS_FORCEINLINE_FUNCTION
253  Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx,
254  unsigned offset, unsigned block,
255  unsigned scan_direction) const {
256  Kokkos::pair<bool, unsigned> result(block > 0u, 0);
257 
258  if (!result.first) {
259  result.second = update_hint(block_idx, offset, scan_direction);
260  } else {
261  result.second =
262  scan_block((block_idx << block_shift), offset, block, scan_direction);
263  }
264  return result;
265  }
266 
267  KOKKOS_FORCEINLINE_FUNCTION
268  unsigned scan_block(unsigned block_start, int offset, unsigned block,
269  unsigned scan_direction) const {
270  offset = !(scan_direction & BIT_SCAN_REVERSE)
271  ? offset
272  : (offset + block_mask) & block_mask;
273  block = Impl::rotate_right(block, offset);
274  return (((!(scan_direction & BIT_SCAN_REVERSE)
275  ? Impl::bit_scan_forward(block)
276  : Impl::int_log2(block)) +
277  offset) &
278  block_mask) +
279  block_start;
280  }
281 
282  KOKKOS_FORCEINLINE_FUNCTION
283  unsigned update_hint(long long block_idx, unsigned offset,
284  unsigned scan_direction) const {
285  block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
286  block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1;
287  block_idx =
288  block_idx < static_cast<long long>(m_blocks.extent(0)) ? block_idx : 0;
289 
290  return static_cast<unsigned>(block_idx) * block_size + offset;
291  }
292 
293  private:
294  unsigned m_size = 0;
295  unsigned m_last_block_mask = 0;
296  block_view_type m_blocks;
297 
298  private:
299  template <typename DDevice>
300  friend class Bitset;
301 
302  template <typename DDevice>
303  friend class ConstBitset;
304 
305  template <typename Bitset>
306  friend struct Impl::BitsetCount;
307 
308  template <typename DstDevice, typename SrcDevice>
309  friend void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src);
310 
311  template <typename DstDevice, typename SrcDevice>
312  friend void deep_copy(Bitset<DstDevice>& dst,
313  ConstBitset<SrcDevice> const& src);
314 };
315 
318 template <typename Device>
319 class ConstBitset {
320  public:
321  using execution_space = typename Device::execution_space;
322  using size_type = unsigned int;
323  using block_view_type = typename Bitset<Device>::block_view_type::const_type;
324 
325  private:
326  static constexpr unsigned block_size = sizeof(unsigned) * CHAR_BIT;
327  static constexpr unsigned block_mask = block_size - 1u;
328  static constexpr unsigned block_shift =
329  Kokkos::has_single_bit(block_size) ? Kokkos::bit_width(block_size) - 1
330  : ~0u;
331 
332  public:
333  KOKKOS_FUNCTION
334  ConstBitset() : m_size(0) {}
335 
336  KOKKOS_FUNCTION
337  ConstBitset(Bitset<Device> const& rhs)
338  : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
339 
340  KOKKOS_FUNCTION
341  ConstBitset(ConstBitset<Device> const& rhs)
342  : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
343 
344  KOKKOS_FUNCTION
345  ConstBitset<Device>& operator=(Bitset<Device> const& rhs) {
346  this->m_size = rhs.m_size;
347  this->m_blocks = rhs.m_blocks;
348 
349  return *this;
350  }
351 
352  KOKKOS_FUNCTION
353  ConstBitset<Device>& operator=(ConstBitset<Device> const& rhs) {
354  this->m_size = rhs.m_size;
355  this->m_blocks = rhs.m_blocks;
356 
357  return *this;
358  }
359 
360  KOKKOS_FORCEINLINE_FUNCTION
361  unsigned size() const { return m_size; }
362 
363  unsigned count() const {
364  Impl::BitsetCount<ConstBitset<Device>> f(*this);
365  return f.apply();
366  }
367 
368  KOKKOS_FORCEINLINE_FUNCTION
369  bool test(unsigned i) const {
370  if (i < m_size) {
371  const unsigned block = m_blocks[i >> block_shift];
372  const unsigned mask = 1u << static_cast<int>(i & block_mask);
373  return block & mask;
374  }
375  return false;
376  }
377 
378  private:
379  unsigned m_size;
380  block_view_type m_blocks;
381 
382  private:
383  template <typename DDevice>
384  friend class ConstBitset;
385 
386  template <typename Bitset>
387  friend struct Impl::BitsetCount;
388 
389  template <typename DstDevice, typename SrcDevice>
390  friend void deep_copy(Bitset<DstDevice>& dst,
391  ConstBitset<SrcDevice> const& src);
392 
393  template <typename DstDevice, typename SrcDevice>
394  friend void deep_copy(ConstBitset<DstDevice>& dst,
395  ConstBitset<SrcDevice> const& src);
396 };
397 
398 template <typename DstDevice, typename SrcDevice>
399 void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src) {
400  if (dst.size() != src.size()) {
401  Kokkos::Impl::throw_runtime_exception(
402  "Error: Cannot deep_copy bitsets of different sizes!");
403  }
404  Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
405 }
406 
407 template <typename DstDevice, typename SrcDevice>
408 void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
409  if (dst.size() != src.size()) {
410  Kokkos::Impl::throw_runtime_exception(
411  "Error: Cannot deep_copy bitsets of different sizes!");
412  }
413  Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
414 }
415 
416 template <typename DstDevice, typename SrcDevice>
417 void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
418  if (dst.size() != src.size()) {
419  Kokkos::Impl::throw_runtime_exception(
420  "Error: Cannot deep_copy bitsets of different sizes!");
421  }
422  Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
423 }
424 
425 } // namespace Kokkos
426 
427 #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET
428 #undef KOKKOS_IMPL_PUBLIC_INCLUDE
429 #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET
430 #endif
431 #endif // KOKKOS_BITSET_HPP
KOKKOS_FORCEINLINE_FUNCTION unsigned max_hint() const
Replacement for std::pair that works on CUDA devices.
Definition: Kokkos_Pair.hpp:44
unsigned count() const
KOKKOS_FORCEINLINE_FUNCTION bool test(unsigned i) const
KOKKOS_FORCEINLINE_FUNCTION unsigned size() const
KOKKOS_INLINE_FUNCTION Kokkos::pair< bool, unsigned > find_any_set_near(unsigned hint, unsigned scan_direction=BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const
KOKKOS_INLINE_FUNCTION Kokkos::pair< bool, unsigned > find_any_unset_near(unsigned hint, unsigned scan_direction=BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const