Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_MemoryPool.hpp
1 //@HEADER
2 // ************************************************************************
3 //
4 // Kokkos v. 4.0
5 // Copyright (2022) National Technology & Engineering
6 // Solutions of Sandia, LLC (NTESS).
7 //
8 // Under the terms of Contract DE-NA0003525 with NTESS,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 // See https://kokkos.org/LICENSE for license information.
13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 //
15 //@HEADER
16 
17 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 #include <Kokkos_Macros.hpp>
19 static_assert(false,
20  "Including non-public Kokkos header files is not allowed.");
21 #endif
22 #ifndef KOKKOS_MEMORYPOOL_HPP
23 #define KOKKOS_MEMORYPOOL_HPP
24 
25 #include <Kokkos_Core_fwd.hpp>
26 #include <Kokkos_Parallel.hpp>
27 #include <Kokkos_Atomic.hpp>
28 #include <impl/Kokkos_ConcurrentBitset.hpp>
29 #include <impl/Kokkos_Error.hpp>
30 #include <impl/Kokkos_SharedAlloc.hpp>
31 
32 namespace Kokkos {
33 namespace Impl {
34 /* Report violation of size constraints:
35  * min_block_alloc_size <= max_block_alloc_size
36  * max_block_alloc_size <= min_superblock_size
37  * min_superblock_size <= max_superblock_size
38  * min_superblock_size <= min_total_alloc_size
39  * min_superblock_size <= min_block_alloc_size *
40  * max_block_per_superblock
41  */
42 void memory_pool_bounds_verification(size_t min_block_alloc_size,
43  size_t max_block_alloc_size,
44  size_t min_superblock_size,
45  size_t max_superblock_size,
46  size_t max_block_per_superblock,
47  size_t min_total_alloc_size);
48 } // namespace Impl
49 } // namespace Kokkos
50 
51 namespace Kokkos {
52 
53 namespace Impl {
54 
55 void _print_memory_pool_state(std::ostream &s, uint32_t const *sb_state_ptr,
56  int32_t sb_count, uint32_t sb_size_lg2,
57  uint32_t sb_state_size, uint32_t state_shift,
58  uint32_t state_used_mask);
59 
60 } // end namespace Impl
61 
62 template <typename DeviceType>
63 class MemoryPool {
64  private:
65  using CB = Kokkos::Impl::concurrent_bitset;
66 
67  enum : uint32_t { bits_per_int_lg2 = CB::bits_per_int_lg2 };
68  enum : uint32_t { state_shift = CB::state_shift };
69  enum : uint32_t { state_used_mask = CB::state_used_mask };
70  enum : uint32_t { state_header_mask = CB::state_header_mask };
71  enum : uint32_t { max_bit_count_lg2 = CB::max_bit_count_lg2 };
72  enum : uint32_t { max_bit_count = CB::max_bit_count };
73 
74  enum : uint32_t { HINT_PER_BLOCK_SIZE = 2 };
75 
76  /* Each superblock has a concurrent bitset state
77  * which is an array of uint32_t integers.
78  * [ { block_count_lg2 : state_shift bits
79  * , used_block_count : ( 32 - state_shift ) bits
80  * }
81  * , { block allocation bit set }* ]
82  *
83  * As superblocks are assigned (allocated) to a block size
84  * and released (deallocated) back to empty the superblock state
85  * is concurrently updated.
86  */
87 
88  /* Mapping between block_size <-> block_state
89  *
90  * block_state = ( m_sb_size_lg2 - block_size_lg2 ) << state_shift
91  * block_size = m_sb_size_lg2 - ( block_state >> state_shift )
92  *
93  * Thus A_block_size < B_block_size <=> A_block_state > B_block_state
94  */
95 
96  using base_memory_space = typename DeviceType::memory_space;
97 
98  enum {
100  base_memory_space>::accessible
101  };
102 
103  using Tracker = Kokkos::Impl::SharedAllocationTracker;
104  using Record = Kokkos::Impl::SharedAllocationRecord<base_memory_space>;
105 
106  Tracker m_tracker;
107  uint32_t *m_sb_state_array;
108  uint32_t m_sb_state_size;
109  uint32_t m_sb_size_lg2;
110  uint32_t m_max_block_size_lg2;
111  uint32_t m_min_block_size_lg2;
112  int32_t m_sb_count;
113  int32_t m_hint_offset; // Offset to K * #block_size array of hints
114  int32_t m_data_offset; // Offset to 0th superblock data
115  int32_t m_unused_padding;
116 
117  public:
118  using memory_space = typename DeviceType::memory_space;
119 
121  enum : uint32_t { max_superblock_size = 1LU << 31 /* 2 gigabytes */ };
122  enum : uint32_t { max_block_per_superblock = max_bit_count };
123 
124  //--------------------------------------------------------------------------
125 
126  KOKKOS_INLINE_FUNCTION
127  bool operator==(MemoryPool const &other) const {
128  return m_sb_state_array == other.m_sb_state_array;
129  }
130 
131  KOKKOS_INLINE_FUNCTION
132  size_t capacity() const noexcept {
133  return size_t(m_sb_count) << m_sb_size_lg2;
134  }
135 
136  KOKKOS_INLINE_FUNCTION
137  size_t min_block_size() const noexcept {
138  return (1LU << m_min_block_size_lg2);
139  }
140 
141  KOKKOS_INLINE_FUNCTION
142  size_t max_block_size() const noexcept {
143  return (1LU << m_max_block_size_lg2);
144  }
145 
146  struct usage_statistics {
147  size_t capacity_bytes;
148  size_t superblock_bytes;
149  size_t max_block_bytes;
150  size_t min_block_bytes;
151  size_t capacity_superblocks;
152  size_t consumed_superblocks;
153  size_t consumed_blocks;
154  size_t consumed_bytes;
155  size_t reserved_blocks;
156  size_t reserved_bytes;
157  };
158 
159  void get_usage_statistics(usage_statistics &stats) const {
160  Kokkos::HostSpace host;
161 
162  const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
163 
164  uint32_t *const sb_state_array =
165  accessible ? m_sb_state_array : (uint32_t *)host.allocate(alloc_size);
166 
167  if (!accessible) {
168  Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
169  sb_state_array, m_sb_state_array, alloc_size);
170  Kokkos::fence(
171  "MemoryPool::get_usage_statistics(): fence after copying state "
172  "array to HostSpace");
173  }
174 
175  stats.superblock_bytes = (1LU << m_sb_size_lg2);
176  stats.max_block_bytes = (1LU << m_max_block_size_lg2);
177  stats.min_block_bytes = (1LU << m_min_block_size_lg2);
178  stats.capacity_bytes = stats.superblock_bytes * m_sb_count;
179  stats.capacity_superblocks = m_sb_count;
180  stats.consumed_superblocks = 0;
181  stats.consumed_blocks = 0;
182  stats.consumed_bytes = 0;
183  stats.reserved_blocks = 0;
184  stats.reserved_bytes = 0;
185 
186  const uint32_t *sb_state_ptr = sb_state_array;
187 
188  for (int32_t i = 0; i < m_sb_count; ++i, sb_state_ptr += m_sb_state_size) {
189  const uint32_t block_count_lg2 = (*sb_state_ptr) >> state_shift;
190 
191  if (block_count_lg2) {
192  const uint32_t block_count = 1u << block_count_lg2;
193  const uint32_t block_size_lg2 = m_sb_size_lg2 - block_count_lg2;
194  const uint32_t block_size = 1u << block_size_lg2;
195  const uint32_t block_used = (*sb_state_ptr) & state_used_mask;
196 
197  stats.consumed_superblocks++;
198  stats.consumed_blocks += block_used;
199  stats.consumed_bytes += static_cast<size_t>(block_used) * block_size;
200  stats.reserved_blocks += block_count - block_used;
201  stats.reserved_bytes +=
202  static_cast<size_t>(block_count - block_used) * block_size;
203  }
204  }
205 
206  if (!accessible) {
207  host.deallocate(sb_state_array, alloc_size);
208  }
209  }
210 
211  void print_state(std::ostream &s) const {
212  Kokkos::HostSpace host;
213 
214  const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
215 
216  uint32_t *const sb_state_array =
217  accessible ? m_sb_state_array : (uint32_t *)host.allocate(alloc_size);
218 
219  if (!accessible) {
220  Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
221  sb_state_array, m_sb_state_array, alloc_size);
222  Kokkos::fence(
223  "MemoryPool::print_state(): fence after copying state array to "
224  "HostSpace");
225  }
226 
227  Impl::_print_memory_pool_state(s, sb_state_array, m_sb_count, m_sb_size_lg2,
228  m_sb_state_size, state_shift,
229  state_used_mask);
230 
231  if (!accessible) {
232  host.deallocate(sb_state_array, alloc_size);
233  }
234  }
235 
236  //--------------------------------------------------------------------------
237 
238  KOKKOS_DEFAULTED_FUNCTION MemoryPool(MemoryPool &&) = default;
239  KOKKOS_DEFAULTED_FUNCTION MemoryPool(const MemoryPool &) = default;
240  KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(MemoryPool &&) = default;
241  KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(const MemoryPool &) = default;
242 
243  KOKKOS_INLINE_FUNCTION MemoryPool()
244  : m_tracker(),
245  m_sb_state_array(nullptr),
246  m_sb_state_size(0),
247  m_sb_size_lg2(0),
248  m_max_block_size_lg2(0),
249  m_min_block_size_lg2(0),
250  m_sb_count(0),
251  m_hint_offset(0),
252  m_data_offset(0),
253  m_unused_padding(0) {}
254 
269  MemoryPool(const base_memory_space &memspace,
270  const size_t min_total_alloc_size, size_t min_block_alloc_size = 0,
271  size_t max_block_alloc_size = 0, size_t min_superblock_size = 0)
272  : m_tracker(),
273  m_sb_state_array(nullptr),
274  m_sb_state_size(0),
275  m_sb_size_lg2(0),
276  m_max_block_size_lg2(0),
277  m_min_block_size_lg2(0),
278  m_sb_count(0),
279  m_hint_offset(0),
280  m_data_offset(0),
281  m_unused_padding(0) {
282  const uint32_t int_align_lg2 = 3; /* align as int[8] */
283  const uint32_t int_align_mask = (1u << int_align_lg2) - 1;
284  const uint32_t default_min_block_size = 1u << 6; /* 64 bytes */
285  const uint32_t default_max_block_size = 1u << 12; /* 4k bytes */
286  const uint32_t default_min_superblock_size = 1u << 20; /* 1M bytes */
287 
288  //--------------------------------------------------
289  // Default block and superblock sizes:
290 
291  if (0 == min_block_alloc_size) {
292  // Default all sizes:
293 
294  min_superblock_size =
295  std::min(size_t(default_min_superblock_size), min_total_alloc_size);
296 
297  min_block_alloc_size =
298  std::min(size_t(default_min_block_size), min_superblock_size);
299 
300  max_block_alloc_size =
301  std::min(size_t(default_max_block_size), min_superblock_size);
302  } else if (0 == min_superblock_size) {
303  // Choose superblock size as minimum of:
304  // max_block_per_superblock * min_block_size
305  // max_superblock_size
306  // min_total_alloc_size
307 
308  const size_t max_superblock =
309  min_block_alloc_size * max_block_per_superblock;
310 
311  min_superblock_size =
312  std::min(max_superblock,
313  std::min(size_t(max_superblock_size), min_total_alloc_size));
314  }
315 
316  if (0 == max_block_alloc_size) {
317  max_block_alloc_size = min_superblock_size;
318  }
319 
320  //--------------------------------------------------
321 
322  /* Enforce size constraints:
323  * min_block_alloc_size <= max_block_alloc_size
324  * max_block_alloc_size <= min_superblock_size
325  * min_superblock_size <= max_superblock_size
326  * min_superblock_size <= min_total_alloc_size
327  * min_superblock_size <= min_block_alloc_size *
328  * max_block_per_superblock
329  */
330 
331  Kokkos::Impl::memory_pool_bounds_verification(
332  min_block_alloc_size, max_block_alloc_size, min_superblock_size,
333  max_superblock_size, max_block_per_superblock, min_total_alloc_size);
334 
335  //--------------------------------------------------
336  // Block and superblock size is power of two:
337  // Maximum value is 'max_superblock_size'
338 
339  m_min_block_size_lg2 =
340  Kokkos::Impl::integral_power_of_two_that_contains(min_block_alloc_size);
341 
342  m_max_block_size_lg2 =
343  Kokkos::Impl::integral_power_of_two_that_contains(max_block_alloc_size);
344 
345  m_sb_size_lg2 =
346  Kokkos::Impl::integral_power_of_two_that_contains(min_superblock_size);
347 
348  {
349  // number of superblocks is multiple of superblock size that
350  // can hold min_total_alloc_size.
351 
352  const uint64_t sb_size_mask = (1LU << m_sb_size_lg2) - 1;
353 
354  m_sb_count = (min_total_alloc_size + sb_size_mask) >> m_sb_size_lg2;
355  }
356 
357  {
358  // Any superblock can be assigned to the smallest size block
359  // Size the block bitset to maximum number of blocks
360 
361  const uint32_t max_block_count_lg2 = m_sb_size_lg2 - m_min_block_size_lg2;
362 
363  m_sb_state_size =
364  (CB::buffer_bound_lg2(max_block_count_lg2) + int_align_mask) &
365  ~int_align_mask;
366  }
367 
368  // Array of all superblock states
369 
370  const size_t all_sb_state_size =
371  (m_sb_count * m_sb_state_size + int_align_mask) & ~int_align_mask;
372 
373  // Number of block sizes
374 
375  const int32_t number_block_sizes =
376  1 + m_max_block_size_lg2 - m_min_block_size_lg2;
377 
378  // Array length for possible block sizes
379  // Hint array is one uint32_t per block size
380 
381  const int32_t block_size_array_size =
382  (number_block_sizes + int_align_mask) & ~int_align_mask;
383 
384  m_hint_offset = all_sb_state_size;
385  m_data_offset = m_hint_offset + block_size_array_size * HINT_PER_BLOCK_SIZE;
386 
387  // Allocation:
388 
389  const size_t header_size = m_data_offset * sizeof(uint32_t);
390  const size_t alloc_size =
391  header_size + (size_t(m_sb_count) << m_sb_size_lg2);
392 
393  Record *rec = Record::allocate(memspace, "Kokkos::MemoryPool", alloc_size);
394 
395  m_tracker.assign_allocated_record_to_uninitialized(rec);
396 
397  m_sb_state_array = (uint32_t *)rec->data();
398 
399  Kokkos::HostSpace host;
400 
401  uint32_t *const sb_state_array =
402  accessible ? m_sb_state_array : (uint32_t *)host.allocate(header_size);
403 
404  for (int32_t i = 0; i < m_data_offset; ++i) sb_state_array[i] = 0;
405 
406  // Initial assignment of empty superblocks to block sizes:
407 
408  for (int32_t i = 0; i < number_block_sizes; ++i) {
409  const uint32_t block_size_lg2 = i + m_min_block_size_lg2;
410  const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2;
411  const uint32_t block_state = block_count_lg2 << state_shift;
412  const uint32_t hint_begin = m_hint_offset + i * HINT_PER_BLOCK_SIZE;
413 
414  // for block size index 'i':
415  // sb_id_hint = sb_state_array[ hint_begin ];
416  // sb_id_begin = sb_state_array[ hint_begin + 1 ];
417 
418  const int32_t jbeg = (i * m_sb_count) / number_block_sizes;
419  const int32_t jend = ((i + 1) * m_sb_count) / number_block_sizes;
420 
421  sb_state_array[hint_begin] = uint32_t(jbeg);
422  sb_state_array[hint_begin + 1] = uint32_t(jbeg);
423 
424  for (int32_t j = jbeg; j < jend; ++j) {
425  sb_state_array[j * m_sb_state_size] = block_state;
426  }
427  }
428 
429  // Write out initialized state:
430 
431  if (!accessible) {
432  Kokkos::Impl::DeepCopy<base_memory_space, Kokkos::HostSpace>(
433  m_sb_state_array, sb_state_array, header_size);
434  Kokkos::fence(
435  "MemoryPool::MemoryPool(): fence after copying state array from "
436  "HostSpace");
437 
438  host.deallocate(sb_state_array, header_size);
439  } else {
440  Kokkos::memory_fence();
441  }
442  }
443 
444  //--------------------------------------------------------------------------
445 
446  private:
447  /* Given a size 'n' get the block size in which it can be allocated.
448  * Restrict lower bound to minimum block size.
449  */
450  KOKKOS_FORCEINLINE_FUNCTION
451  uint32_t get_block_size_lg2(uint32_t n) const noexcept {
452  const unsigned i = Kokkos::Impl::integral_power_of_two_that_contains(n);
453 
454  return i < m_min_block_size_lg2 ? m_min_block_size_lg2 : i;
455  }
456 
457  public:
458  /* Return 0 for invalid block size */
459  KOKKOS_INLINE_FUNCTION
460  uint32_t allocate_block_size(uint64_t alloc_size) const noexcept {
461  return alloc_size <= (1UL << m_max_block_size_lg2)
462  ? (1UL << get_block_size_lg2(uint32_t(alloc_size)))
463  : 0;
464  }
465 
466  //--------------------------------------------------------------------------
476  KOKKOS_FUNCTION
477  void *allocate(size_t alloc_size, int32_t attempt_limit = 1) const noexcept {
478  if (size_t(1LU << m_max_block_size_lg2) < alloc_size) {
479  Kokkos::abort(
480  "Kokkos MemoryPool allocation request exceeded specified maximum "
481  "allocation size");
482  }
483 
484  if (0 == alloc_size) return nullptr;
485 
486  void *p = nullptr;
487 
488  const uint32_t block_size_lg2 = get_block_size_lg2(alloc_size);
489 
490  // Allocation will fit within a superblock
491  // that has block sizes ( 1 << block_size_lg2 )
492 
493  const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2;
494  const uint32_t block_state = block_count_lg2 << state_shift;
495  const uint32_t block_count = 1u << block_count_lg2;
496 
497  // Superblock hints for this block size:
498  // hint_sb_id_ptr[0] is the dynamically changing hint
499  // hint_sb_id_ptr[1] is the static start point
500 
501  volatile uint32_t *const hint_sb_id_ptr =
502  m_sb_state_array /* memory pool state array */
503  + m_hint_offset /* offset to hint portion of array */
504  + HINT_PER_BLOCK_SIZE /* number of hints per block size */
505  * (block_size_lg2 - m_min_block_size_lg2); /* block size id */
506 
507  const int32_t sb_id_begin = int32_t(hint_sb_id_ptr[1]);
508 
509  // Fast query clock register 'tic' to pseudo-randomize
510  // the guess for which block within a superblock should
511  // be claimed. If not available then a search occurs.
512 #if defined(KOKKOS_ENABLE_SYCL) && !defined(KOKKOS_ARCH_INTEL_GPU)
513  const uint32_t block_id_hint = alloc_size;
514 #else
515  const uint32_t block_id_hint =
516  (uint32_t)(Kokkos::Impl::clock_tic()
517 #ifdef __CUDA_ARCH__ // FIXME_CUDA
518  // Spread out potentially concurrent access
519  // by threads within a warp or thread block.
520  + (threadIdx.x + blockDim.x * threadIdx.y)
521 #endif
522  );
523 #endif
524 
525  // expected state of superblock for allocation
526  uint32_t sb_state = block_state;
527 
528  int32_t sb_id = -1;
529 
530  volatile uint32_t *sb_state_array = nullptr;
531 
532  while (attempt_limit) {
533  int32_t hint_sb_id = -1;
534 
535  if (sb_id < 0) {
536  // No superblock specified, try the hint for this block size
537 
538  sb_id = hint_sb_id = int32_t(*hint_sb_id_ptr);
539 
540  sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
541  }
542 
543  // Require:
544  // 0 <= sb_id
545  // sb_state_array == m_sb_state_array + m_sb_state_size * sb_id
546 
547  if (sb_state == (state_header_mask & *sb_state_array)) {
548  // This superblock state is as expected, for the moment.
549  // Attempt to claim a bit. The attempt updates the state
550  // so have already made sure the state header is as expected.
551 
552  const uint32_t count_lg2 = sb_state >> state_shift;
553  const uint32_t mask = (1u << count_lg2) - 1;
554 
555  const Kokkos::pair<int, int> result = CB::acquire_bounded_lg2(
556  sb_state_array, count_lg2, block_id_hint & mask, sb_state);
557 
558  // If result.first < 0 then failed to acquire
559  // due to either full or buffer was wrong state.
560  // Could be wrong state if a deallocation raced the
561  // superblock to empty before the acquire could succeed.
562 
563  if (0 <= result.first) { // acquired a bit
564 
565  const uint32_t size_lg2 = m_sb_size_lg2 - count_lg2;
566 
567  // Set the allocated block pointer
568 
569  p = ((char *)(m_sb_state_array + m_data_offset)) +
570  (uint64_t(sb_id) << m_sb_size_lg2) // superblock memory
571  + (uint64_t(result.first) << size_lg2); // block memory
572 
573  break; // Success
574  }
575  }
576  //------------------------------------------------------------------
577  // Arrive here if failed to acquire a block.
578  // Must find a new superblock.
579 
580  // Start searching at designated index for this block size.
581  // Look for superblock that, in preferential order,
582  // 1) part-full superblock of this block size
583  // 2) empty superblock to claim for this block size
584  // 3) part-full superblock of the next larger block size
585 
586  sb_state = block_state; // Expect to find the desired state
587  sb_id = -1;
588 
589  bool update_hint = false;
590  int32_t sb_id_empty = -1;
591  int32_t sb_id_large = -1;
592  uint32_t sb_state_large = 0;
593 
594  sb_state_array = m_sb_state_array + sb_id_begin * m_sb_state_size;
595 
596  for (int32_t i = 0, id = sb_id_begin; i < m_sb_count; ++i) {
597  // Query state of the candidate superblock.
598  // Note that the state may change at any moment
599  // as concurrent allocations and deallocations occur.
600 
601  const uint32_t full_state = *sb_state_array;
602  const uint32_t used = full_state & state_used_mask;
603  const uint32_t state = full_state & state_header_mask;
604 
605  if (state == block_state) {
606  // Superblock is assigned to this block size
607 
608  if (used < block_count) {
609  // There is room to allocate one block
610 
611  sb_id = id;
612 
613  // Is there room to allocate more than one block?
614 
615  update_hint = used + 1 < block_count;
616 
617  break;
618  }
619  } else if (0 == used) {
620  // Superblock is empty
621 
622  if (-1 == sb_id_empty) {
623  // Superblock is not assigned to this block size
624  // and is the first empty superblock encountered.
625  // Save this id to use if a partfull superblock is not found.
626 
627  sb_id_empty = id;
628  }
629  } else if ((-1 == sb_id_empty /* have not found an empty */) &&
630  (-1 == sb_id_large /* have not found a larger */) &&
631  (state < block_state /* a larger block */) &&
632  // is not full:
633  (used < (1u << (state >> state_shift)))) {
634  // First superblock encountered that is
635  // larger than this block size and
636  // has room for an allocation.
637  // Save this id to use of partfull or empty superblock not found
638  sb_id_large = id;
639  sb_state_large = state;
640  }
641 
642  // Iterate around the superblock array:
643 
644  if (++id < m_sb_count) {
645  sb_state_array += m_sb_state_size;
646  } else {
647  id = 0;
648  sb_state_array = m_sb_state_array;
649  }
650  }
651 
652  // printf(" search m_sb_count(%d) sb_id(%d) sb_id_empty(%d)
653  // sb_id_large(%d)\n" , m_sb_count , sb_id , sb_id_empty , sb_id_large);
654 
655  if (sb_id < 0) {
656  // Did not find a partfull superblock for this block size.
657 
658  if (0 <= sb_id_empty) {
659  // Found first empty superblock following designated superblock
660  // Attempt to claim it for this block size.
661  // If the claim fails assume that another thread claimed it
662  // for this block size and try to use it anyway,
663  // but do not update hint.
664 
665  sb_id = sb_id_empty;
666 
667  sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
668 
669  // If successfully changed assignment of empty superblock 'sb_id'
670  // to this block_size then update the hint.
671 
672  const uint32_t state_empty = state_header_mask & *sb_state_array;
673 
674  // If this thread claims the empty block then update the hint
675  update_hint =
676  state_empty == Kokkos::atomic_compare_exchange(
677  sb_state_array, state_empty, block_state);
678  } else if (0 <= sb_id_large) {
679  // Found a larger superblock with space available
680 
681  sb_id = sb_id_large;
682  sb_state = sb_state_large;
683 
684  sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
685  } else {
686  // Did not find a potentially usable superblock
687  --attempt_limit;
688  }
689  }
690 
691  if (update_hint) {
692  Kokkos::atomic_compare_exchange(hint_sb_id_ptr, uint32_t(hint_sb_id),
693  uint32_t(sb_id));
694  }
695  } // end allocation attempt loop
696  //--------------------------------------------------------------------
697 
698  return p;
699  }
700  // end allocate
701  //--------------------------------------------------------------------------
702 
709  KOKKOS_INLINE_FUNCTION
710  void deallocate(void *p, size_t /* alloc_size */) const noexcept {
711  if (nullptr == p) return;
712 
713  // Determine which superblock and block
714  const ptrdiff_t d =
715  static_cast<char *>(p) -
716  reinterpret_cast<char *>(m_sb_state_array + m_data_offset);
717 
718  // Verify contained within the memory pool's superblocks:
719  const int ok_contains =
720  (0 <= d) && (size_t(d) < (size_t(m_sb_count) << m_sb_size_lg2));
721 
722  int ok_block_aligned = 0;
723  int ok_dealloc_once = 0;
724 
725  if (ok_contains) {
726  const int sb_id = d >> m_sb_size_lg2;
727 
728  // State array for the superblock.
729  volatile uint32_t *const sb_state_array =
730  m_sb_state_array + (sb_id * m_sb_state_size);
731 
732  const uint32_t block_state = (*sb_state_array) & state_header_mask;
733  const uint32_t block_size_lg2 =
734  m_sb_size_lg2 - (block_state >> state_shift);
735 
736  ok_block_aligned = 0 == (d & ((1UL << block_size_lg2) - 1));
737 
738  if (ok_block_aligned) {
739  // Map address to block's bit
740  // mask into superblock and then shift down for block index
741 
742  const uint32_t bit =
743  (d & (ptrdiff_t(1LU << m_sb_size_lg2) - 1)) >> block_size_lg2;
744 
745  const int result = CB::release(sb_state_array, bit, block_state);
746 
747  ok_dealloc_once = 0 <= result;
748  }
749  }
750 
751  if (!ok_contains || !ok_block_aligned || !ok_dealloc_once) {
752  Kokkos::abort("Kokkos MemoryPool::deallocate given erroneous pointer");
753  }
754  }
755  // end deallocate
756  //--------------------------------------------------------------------------
757 
758  KOKKOS_INLINE_FUNCTION
759  int number_of_superblocks() const noexcept { return m_sb_count; }
760 
761  KOKKOS_INLINE_FUNCTION
762  void superblock_state(int sb_id, int &block_size, int &block_count_capacity,
763  int &block_count_used) const noexcept {
764  block_size = 0;
765  block_count_capacity = 0;
766  block_count_used = 0;
767 
768  bool can_access_state_array = []() {
769  KOKKOS_IF_ON_HOST(
770  (return SpaceAccessibility<DefaultHostExecutionSpace,
771  base_memory_space>::accessible;))
772  KOKKOS_IF_ON_DEVICE(
773  (return SpaceAccessibility<DefaultExecutionSpace,
774  base_memory_space>::accessible;))
775  }();
776 
777  if (can_access_state_array) {
778  // Can access the state array
779 
780  const uint32_t state =
781  ((uint32_t volatile *)m_sb_state_array)[sb_id * m_sb_state_size];
782 
783  const uint32_t block_count_lg2 = state >> state_shift;
784  const uint32_t block_used = state & state_used_mask;
785 
786  block_size = 1LU << (m_sb_size_lg2 - block_count_lg2);
787  block_count_capacity = 1LU << block_count_lg2;
788  block_count_used = block_used;
789  }
790  }
791 };
792 
793 } // namespace Kokkos
794 
795 #endif /* #ifndef KOKKOS_MEMORYPOOL_HPP */
void * allocate(const ExecutionSpace &, const size_t arg_alloc_size) const
Allocate untracked memory in the space.
Replacement for std::pair that works on CUDA devices.
Definition: Kokkos_Pair.hpp:44
void deallocate(void *const arg_alloc_ptr, const size_t arg_alloc_size) const
Deallocate untracked memory in the space.
first_type first
The first element of the pair.
Definition: Kokkos_Pair.hpp:51
Memory management for host memory.
Declaration of parallel operators.
Atomic functions.
Access relationship between DstMemorySpace and SrcMemorySpace.