Kokkos Core Kernels Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Kokkos_MemoryPool.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_MEMORYPOOL_HPP
46 #define KOKKOS_MEMORYPOOL_HPP
47 
48 #include <Kokkos_Core_fwd.hpp>
49 #include <Kokkos_Parallel.hpp>
50 #include <Kokkos_Atomic.hpp>
51 #include <impl/Kokkos_ConcurrentBitset.hpp>
52 #include <impl/Kokkos_Error.hpp>
53 #include <impl/Kokkos_SharedAlloc.hpp>
54 
55 namespace Kokkos {
56 namespace Impl {
57 /* Report violation of size constraints:
58  * min_block_alloc_size <= max_block_alloc_size
59  * max_block_alloc_size <= min_superblock_size
60  * min_superblock_size <= max_superblock_size
61  * min_superblock_size <= min_total_alloc_size
62  * min_superblock_size <= min_block_alloc_size *
63  * max_block_per_superblock
64  */
65 void memory_pool_bounds_verification(size_t min_block_alloc_size,
66  size_t max_block_alloc_size,
67  size_t min_superblock_size,
68  size_t max_superblock_size,
69  size_t max_block_per_superblock,
70  size_t min_total_alloc_size);
71 } // namespace Impl
72 } // namespace Kokkos
73 
74 namespace Kokkos {
75 
76 template <typename DeviceType>
77 class MemoryPool {
78  private:
79  typedef typename Kokkos::Impl::concurrent_bitset CB;
80 
81  enum : uint32_t { bits_per_int_lg2 = CB::bits_per_int_lg2 };
82  enum : uint32_t { state_shift = CB::state_shift };
83  enum : uint32_t { state_used_mask = CB::state_used_mask };
84  enum : uint32_t { state_header_mask = CB::state_header_mask };
85  enum : uint32_t { max_bit_count_lg2 = CB::max_bit_count_lg2 };
86  enum : uint32_t { max_bit_count = CB::max_bit_count };
87 
88  enum : uint32_t { HINT_PER_BLOCK_SIZE = 2 };
89 
90  /* Each superblock has a concurrent bitset state
91  * which is an array of uint32_t integers.
92  * [ { block_count_lg2 : state_shift bits
93  * , used_block_count : ( 32 - state_shift ) bits
94  * }
95  * , { block allocation bit set }* ]
96  *
97  * As superblocks are assigned (allocated) to a block size
98  * and released (deallocated) back to empty the superblock state
99  * is concurrently updated.
100  */
101 
102  /* Mapping between block_size <-> block_state
103  *
104  * block_state = ( m_sb_size_lg2 - block_size_lg2 ) << state_shift
105  * block_size = m_sb_size_lg2 - ( block_state >> state_shift )
106  *
107  * Thus A_block_size < B_block_size <=> A_block_state > B_block_state
108  */
109 
110  typedef typename DeviceType::memory_space base_memory_space;
111 
112  enum {
114  base_memory_space>::accessible
115  };
116 
117  typedef Kokkos::Impl::SharedAllocationTracker Tracker;
118  typedef Kokkos::Impl::SharedAllocationRecord<base_memory_space> Record;
119 
120  Tracker m_tracker;
121  uint32_t *m_sb_state_array;
122  uint32_t m_sb_state_size;
123  uint32_t m_sb_size_lg2;
124  uint32_t m_max_block_size_lg2;
125  uint32_t m_min_block_size_lg2;
126  int32_t m_sb_count;
127  int32_t m_hint_offset; // Offset to K * #block_size array of hints
128  int32_t m_data_offset; // Offset to 0th superblock data
129  int32_t m_unused_padding;
130 
131  public:
132  using memory_space = typename DeviceType::memory_space;
133 
135  enum : uint32_t { max_superblock_size = 1LU << 31 /* 2 gigabytes */ };
136  enum : uint32_t { max_block_per_superblock = max_bit_count };
137 
138  //--------------------------------------------------------------------------
139 
140  KOKKOS_INLINE_FUNCTION
141  bool operator==(MemoryPool const &other) const {
142  return m_sb_state_array == other.m_sb_state_array;
143  }
144 
145  KOKKOS_INLINE_FUNCTION
146  size_t capacity() const noexcept {
147  return size_t(m_sb_count) << m_sb_size_lg2;
148  }
149 
150  KOKKOS_INLINE_FUNCTION
151  size_t min_block_size() const noexcept {
152  return (1LU << m_min_block_size_lg2);
153  }
154 
155  KOKKOS_INLINE_FUNCTION
156  size_t max_block_size() const noexcept {
157  return (1LU << m_max_block_size_lg2);
158  }
159 
160  struct usage_statistics {
161  size_t capacity_bytes;
162  size_t superblock_bytes;
163  size_t max_block_bytes;
164  size_t min_block_bytes;
165  size_t capacity_superblocks;
166  size_t consumed_superblocks;
167  size_t consumed_blocks;
168  size_t consumed_bytes;
169  size_t reserved_blocks;
170  size_t reserved_bytes;
171  };
172 
173  void get_usage_statistics(usage_statistics &stats) const {
174  Kokkos::HostSpace host;
175 
176  const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
177 
178  uint32_t *const sb_state_array =
179  accessible ? m_sb_state_array : (uint32_t *)host.allocate(alloc_size);
180 
181  if (!accessible) {
182  Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
183  sb_state_array, m_sb_state_array, alloc_size);
184  }
185 
186  stats.superblock_bytes = (1LU << m_sb_size_lg2);
187  stats.max_block_bytes = (1LU << m_max_block_size_lg2);
188  stats.min_block_bytes = (1LU << m_min_block_size_lg2);
189  stats.capacity_bytes = stats.superblock_bytes * m_sb_count;
190  stats.capacity_superblocks = m_sb_count;
191  stats.consumed_superblocks = 0;
192  stats.consumed_blocks = 0;
193  stats.consumed_bytes = 0;
194  stats.reserved_blocks = 0;
195  stats.reserved_bytes = 0;
196 
197  const uint32_t *sb_state_ptr = sb_state_array;
198 
199  for (int32_t i = 0; i < m_sb_count; ++i, sb_state_ptr += m_sb_state_size) {
200  const uint32_t block_count_lg2 = (*sb_state_ptr) >> state_shift;
201 
202  if (block_count_lg2) {
203  const uint32_t block_count = 1u << block_count_lg2;
204  const uint32_t block_size_lg2 = m_sb_size_lg2 - block_count_lg2;
205  const uint32_t block_size = 1u << block_size_lg2;
206  const uint32_t block_used = (*sb_state_ptr) & state_used_mask;
207 
208  stats.consumed_superblocks++;
209  stats.consumed_blocks += block_used;
210  stats.consumed_bytes += block_used * block_size;
211  stats.reserved_blocks += block_count - block_used;
212  stats.reserved_bytes += (block_count - block_used) * block_size;
213  }
214  }
215 
216  if (!accessible) {
217  host.deallocate(sb_state_array, alloc_size);
218  }
219  }
220 
221  void print_state(std::ostream &s) const {
222  Kokkos::HostSpace host;
223 
224  const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
225 
226  uint32_t *const sb_state_array =
227  accessible ? m_sb_state_array : (uint32_t *)host.allocate(alloc_size);
228 
229  if (!accessible) {
230  Kokkos::Impl::DeepCopy<Kokkos::HostSpace, base_memory_space>(
231  sb_state_array, m_sb_state_array, alloc_size);
232  }
233 
234  const uint32_t *sb_state_ptr = sb_state_array;
235 
236  s << "pool_size(" << (size_t(m_sb_count) << m_sb_size_lg2) << ")"
237  << " superblock_size(" << (1LU << m_sb_size_lg2) << ")" << std::endl;
238 
239  for (int32_t i = 0; i < m_sb_count; ++i, sb_state_ptr += m_sb_state_size) {
240  if (*sb_state_ptr) {
241  const uint32_t block_count_lg2 = (*sb_state_ptr) >> state_shift;
242  const uint32_t block_size_lg2 = m_sb_size_lg2 - block_count_lg2;
243  const uint32_t block_count = 1u << block_count_lg2;
244  const uint32_t block_used = (*sb_state_ptr) & state_used_mask;
245 
246  s << "Superblock[ " << i << " / " << m_sb_count << " ] {"
247  << " block_size(" << (1 << block_size_lg2) << ")"
248  << " block_count( " << block_used << " / " << block_count << " )"
249  << std::endl;
250  }
251  }
252 
253  if (!accessible) {
254  host.deallocate(sb_state_array, alloc_size);
255  }
256  }
257 
258  //--------------------------------------------------------------------------
259 
260  KOKKOS_DEFAULTED_FUNCTION MemoryPool(MemoryPool &&) = default;
261  KOKKOS_DEFAULTED_FUNCTION MemoryPool(const MemoryPool &) = default;
262  KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(MemoryPool &&) = default;
263  KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(const MemoryPool &) = default;
264 
265  KOKKOS_INLINE_FUNCTION MemoryPool()
266  : m_tracker(),
267  m_sb_state_array(nullptr),
268  m_sb_state_size(0),
269  m_sb_size_lg2(0),
270  m_max_block_size_lg2(0),
271  m_min_block_size_lg2(0),
272  m_sb_count(0),
273  m_hint_offset(0),
274  m_data_offset(0),
275  m_unused_padding(0) {}
276 
291  MemoryPool(const base_memory_space &memspace,
292  const size_t min_total_alloc_size, size_t min_block_alloc_size = 0,
293  size_t max_block_alloc_size = 0, size_t min_superblock_size = 0)
294  : m_tracker(),
295  m_sb_state_array(nullptr),
296  m_sb_state_size(0),
297  m_sb_size_lg2(0),
298  m_max_block_size_lg2(0),
299  m_min_block_size_lg2(0),
300  m_sb_count(0),
301  m_hint_offset(0),
302  m_data_offset(0),
303  m_unused_padding(0) {
304  const uint32_t int_align_lg2 = 3; /* align as int[8] */
305  const uint32_t int_align_mask = (1u << int_align_lg2) - 1;
306  const uint32_t default_min_block_size = 1u << 6; /* 64 bytes */
307  const uint32_t default_max_block_size = 1u << 12; /* 4k bytes */
308  const uint32_t default_min_superblock_size = 1u << 20; /* 1M bytes */
309 
310  //--------------------------------------------------
311  // Default block and superblock sizes:
312 
313  if (0 == min_block_alloc_size) {
314  // Default all sizes:
315 
316  min_superblock_size =
317  std::min(size_t(default_min_superblock_size), min_total_alloc_size);
318 
319  min_block_alloc_size =
320  std::min(size_t(default_min_block_size), min_superblock_size);
321 
322  max_block_alloc_size =
323  std::min(size_t(default_max_block_size), min_superblock_size);
324  } else if (0 == min_superblock_size) {
325  // Choose superblock size as minimum of:
326  // max_block_per_superblock * min_block_size
327  // max_superblock_size
328  // min_total_alloc_size
329 
330  const size_t max_superblock =
331  min_block_alloc_size * max_block_per_superblock;
332 
333  min_superblock_size =
334  std::min(max_superblock,
335  std::min(size_t(max_superblock_size), min_total_alloc_size));
336  }
337 
338  if (0 == max_block_alloc_size) {
339  max_block_alloc_size = min_superblock_size;
340  }
341 
342  //--------------------------------------------------
343 
344  /* Enforce size constraints:
345  * min_block_alloc_size <= max_block_alloc_size
346  * max_block_alloc_size <= min_superblock_size
347  * min_superblock_size <= max_superblock_size
348  * min_superblock_size <= min_total_alloc_size
349  * min_superblock_size <= min_block_alloc_size *
350  * max_block_per_superblock
351  */
352 
353  Kokkos::Impl::memory_pool_bounds_verification(
354  min_block_alloc_size, max_block_alloc_size, min_superblock_size,
355  max_superblock_size, max_block_per_superblock, min_total_alloc_size);
356 
357  //--------------------------------------------------
358  // Block and superblock size is power of two:
359  // Maximum value is 'max_superblock_size'
360 
361  m_min_block_size_lg2 =
362  Kokkos::Impl::integral_power_of_two_that_contains(min_block_alloc_size);
363 
364  m_max_block_size_lg2 =
365  Kokkos::Impl::integral_power_of_two_that_contains(max_block_alloc_size);
366 
367  m_sb_size_lg2 =
368  Kokkos::Impl::integral_power_of_two_that_contains(min_superblock_size);
369 
370  {
371  // number of superblocks is multiple of superblock size that
372  // can hold min_total_alloc_size.
373 
374  const uint64_t sb_size_mask = (1LU << m_sb_size_lg2) - 1;
375 
376  m_sb_count = (min_total_alloc_size + sb_size_mask) >> m_sb_size_lg2;
377  }
378 
379  {
380  // Any superblock can be assigned to the smallest size block
381  // Size the block bitset to maximum number of blocks
382 
383  const uint32_t max_block_count_lg2 = m_sb_size_lg2 - m_min_block_size_lg2;
384 
385  m_sb_state_size =
386  (CB::buffer_bound_lg2(max_block_count_lg2) + int_align_mask) &
387  ~int_align_mask;
388  }
389 
390  // Array of all superblock states
391 
392  const size_t all_sb_state_size =
393  (m_sb_count * m_sb_state_size + int_align_mask) & ~int_align_mask;
394 
395  // Number of block sizes
396 
397  const int32_t number_block_sizes =
398  1 + m_max_block_size_lg2 - m_min_block_size_lg2;
399 
400  // Array length for possible block sizes
401  // Hint array is one uint32_t per block size
402 
403  const int32_t block_size_array_size =
404  (number_block_sizes + int_align_mask) & ~int_align_mask;
405 
406  m_hint_offset = all_sb_state_size;
407  m_data_offset = m_hint_offset + block_size_array_size * HINT_PER_BLOCK_SIZE;
408 
409  // Allocation:
410 
411  const size_t header_size = m_data_offset * sizeof(uint32_t);
412  const size_t alloc_size =
413  header_size + (size_t(m_sb_count) << m_sb_size_lg2);
414 
415  Record *rec = Record::allocate(memspace, "MemoryPool", alloc_size);
416 
417  m_tracker.assign_allocated_record_to_uninitialized(rec);
418 
419  m_sb_state_array = (uint32_t *)rec->data();
420 
421  Kokkos::HostSpace host;
422 
423  uint32_t *const sb_state_array =
424  accessible ? m_sb_state_array : (uint32_t *)host.allocate(header_size);
425 
426  for (int32_t i = 0; i < m_data_offset; ++i) sb_state_array[i] = 0;
427 
428  // Initial assignment of empty superblocks to block sizes:
429 
430  for (int32_t i = 0; i < number_block_sizes; ++i) {
431  const uint32_t block_size_lg2 = i + m_min_block_size_lg2;
432  const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2;
433  const uint32_t block_state = block_count_lg2 << state_shift;
434  const uint32_t hint_begin = m_hint_offset + i * HINT_PER_BLOCK_SIZE;
435 
436  // for block size index 'i':
437  // sb_id_hint = sb_state_array[ hint_begin ];
438  // sb_id_begin = sb_state_array[ hint_begin + 1 ];
439 
440  const int32_t jbeg = (i * m_sb_count) / number_block_sizes;
441  const int32_t jend = ((i + 1) * m_sb_count) / number_block_sizes;
442 
443  sb_state_array[hint_begin] = uint32_t(jbeg);
444  sb_state_array[hint_begin + 1] = uint32_t(jbeg);
445 
446  for (int32_t j = jbeg; j < jend; ++j) {
447  sb_state_array[j * m_sb_state_size] = block_state;
448  }
449  }
450 
451  // Write out initialized state:
452 
453  if (!accessible) {
454  Kokkos::Impl::DeepCopy<base_memory_space, Kokkos::HostSpace>(
455  m_sb_state_array, sb_state_array, header_size);
456 
457  host.deallocate(sb_state_array, header_size);
458  } else {
459  Kokkos::memory_fence();
460  }
461  }
462 
463  //--------------------------------------------------------------------------
464 
465  private:
466  /* Given a size 'n' get the block size in which it can be allocated.
467  * Restrict lower bound to minimum block size.
468  */
469  KOKKOS_FORCEINLINE_FUNCTION
470  uint32_t get_block_size_lg2(uint32_t n) const noexcept {
471  const unsigned i = Kokkos::Impl::integral_power_of_two_that_contains(n);
472 
473  return i < m_min_block_size_lg2 ? m_min_block_size_lg2 : i;
474  }
475 
476  public:
477  /* Return 0 for invalid block size */
478  KOKKOS_INLINE_FUNCTION
479  uint32_t allocate_block_size(uint64_t alloc_size) const noexcept {
480  return alloc_size <= (1UL << m_max_block_size_lg2)
481  ? (1UL << get_block_size_lg2(uint32_t(alloc_size)))
482  : 0;
483  }
484 
485  //--------------------------------------------------------------------------
495  KOKKOS_FUNCTION
496  void *allocate(size_t alloc_size, int32_t attempt_limit = 1) const noexcept {
497  if (size_t(1LU << m_max_block_size_lg2) < alloc_size) {
498  Kokkos::abort(
499  "Kokkos MemoryPool allocation request exceeded specified maximum "
500  "allocation size");
501  }
502 
503  if (0 == alloc_size) return nullptr;
504 
505  void *p = nullptr;
506 
507  const uint32_t block_size_lg2 = get_block_size_lg2(alloc_size);
508 
509  // Allocation will fit within a superblock
510  // that has block sizes ( 1 << block_size_lg2 )
511 
512  const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2;
513  const uint32_t block_state = block_count_lg2 << state_shift;
514  const uint32_t block_count = 1u << block_count_lg2;
515 
516  // Superblock hints for this block size:
517  // hint_sb_id_ptr[0] is the dynamically changing hint
518  // hint_sb_id_ptr[1] is the static start point
519 
520  volatile uint32_t *const hint_sb_id_ptr =
521  m_sb_state_array /* memory pool state array */
522  + m_hint_offset /* offset to hint portion of array */
523  + HINT_PER_BLOCK_SIZE /* number of hints per block size */
524  * (block_size_lg2 - m_min_block_size_lg2); /* block size id */
525 
526  const int32_t sb_id_begin = int32_t(hint_sb_id_ptr[1]);
527 
528  // Fast query clock register 'tic' to pseudo-randomize
529  // the guess for which block within a superblock should
530  // be claimed. If not available then a search occurs.
531 
532  const uint32_t block_id_hint =
533  (uint32_t)(Kokkos::Impl::clock_tic()
534 #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA)
535  // Spread out potentially concurrent access
536  // by threads within a warp or thread block.
537  + (threadIdx.x + blockDim.x * threadIdx.y)
538 #endif
539  );
540 
541  // expected state of superblock for allocation
542  uint32_t sb_state = block_state;
543 
544  int32_t sb_id = -1;
545 
546  volatile uint32_t *sb_state_array = nullptr;
547 
548  while (attempt_limit) {
549  int32_t hint_sb_id = -1;
550 
551  if (sb_id < 0) {
552  // No superblock specified, try the hint for this block size
553 
554  sb_id = hint_sb_id = int32_t(*hint_sb_id_ptr);
555 
556  sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
557  }
558 
559  // Require:
560  // 0 <= sb_id
561  // sb_state_array == m_sb_state_array + m_sb_state_size * sb_id
562 
563  if (sb_state == (state_header_mask & *sb_state_array)) {
564  // This superblock state is as expected, for the moment.
565  // Attempt to claim a bit. The attempt updates the state
566  // so have already made sure the state header is as expected.
567 
568  const uint32_t count_lg2 = sb_state >> state_shift;
569  const uint32_t mask = (1u << count_lg2) - 1;
570 
571  const Kokkos::pair<int, int> result = CB::acquire_bounded_lg2(
572  sb_state_array, count_lg2, block_id_hint & mask, sb_state);
573 
574  // If result.first < 0 then failed to acquire
575  // due to either full or buffer was wrong state.
576  // Could be wrong state if a deallocation raced the
577  // superblock to empty before the acquire could succeed.
578 
579  if (0 <= result.first) { // acquired a bit
580 
581  const uint32_t size_lg2 = m_sb_size_lg2 - count_lg2;
582 
583  // Set the allocated block pointer
584 
585  p = ((char *)(m_sb_state_array + m_data_offset)) +
586  (uint64_t(sb_id) << m_sb_size_lg2) // superblock memory
587  + (uint64_t(result.first) << size_lg2); // block memory
588 
589 #if 0
590  printf( " MemoryPool(0x%lx) pointer(0x%lx) allocate(%lu) sb_id(%d) sb_state(0x%x) block_size(%d) block_capacity(%d) block_id(%d) block_claimed(%d)\n"
591  , (uintptr_t)m_sb_state_array
592  , (uintptr_t)p
593  , alloc_size
594  , sb_id
595  , sb_state
596  , (1u << size_lg2)
597  , (1u << count_lg2)
598  , result.first
599  , result.second );
600 #endif
601 
602  break; // Success
603  }
604  }
605  //------------------------------------------------------------------
606  // Arrive here if failed to acquire a block.
607  // Must find a new superblock.
608 
609  // Start searching at designated index for this block size.
610  // Look for superblock that, in preferential order,
611  // 1) part-full superblock of this block size
612  // 2) empty superblock to claim for this block size
613  // 3) part-full superblock of the next larger block size
614 
615  sb_state = block_state; // Expect to find the desired state
616  sb_id = -1;
617 
618  bool update_hint = false;
619  int32_t sb_id_empty = -1;
620  int32_t sb_id_large = -1;
621  uint32_t sb_state_large = 0;
622 
623  sb_state_array = m_sb_state_array + sb_id_begin * m_sb_state_size;
624 
625  for (int32_t i = 0, id = sb_id_begin; i < m_sb_count; ++i) {
626  // Query state of the candidate superblock.
627  // Note that the state may change at any moment
628  // as concurrent allocations and deallocations occur.
629 
630  const uint32_t full_state = *sb_state_array;
631  const uint32_t used = full_state & state_used_mask;
632  const uint32_t state = full_state & state_header_mask;
633 
634  if (state == block_state) {
635  // Superblock is assigned to this block size
636 
637  if (used < block_count) {
638  // There is room to allocate one block
639 
640  sb_id = id;
641 
642  // Is there room to allocate more than one block?
643 
644  update_hint = used + 1 < block_count;
645 
646  break;
647  }
648  } else if (0 == used) {
649  // Superblock is empty
650 
651  if (-1 == sb_id_empty) {
652  // Superblock is not assigned to this block size
653  // and is the first empty superblock encountered.
654  // Save this id to use if a partfull superblock is not found.
655 
656  sb_id_empty = id;
657  }
658  } else if ((-1 == sb_id_empty /* have not found an empty */) &&
659  (-1 == sb_id_large /* have not found a larger */) &&
660  (state < block_state /* a larger block */) &&
661  // is not full:
662  (used < (1u << (state >> state_shift)))) {
663  // First superblock encountered that is
664  // larger than this block size and
665  // has room for an allocation.
666  // Save this id to use of partfull or empty superblock not found
667  sb_id_large = id;
668  sb_state_large = state;
669  }
670 
671  // Iterate around the superblock array:
672 
673  if (++id < m_sb_count) {
674  sb_state_array += m_sb_state_size;
675  } else {
676  id = 0;
677  sb_state_array = m_sb_state_array;
678  }
679  }
680 
681  // printf(" search m_sb_count(%d) sb_id(%d) sb_id_empty(%d)
682  // sb_id_large(%d)\n" , m_sb_count , sb_id , sb_id_empty , sb_id_large);
683 
684  if (sb_id < 0) {
685  // Did not find a partfull superblock for this block size.
686 
687  if (0 <= sb_id_empty) {
688  // Found first empty superblock following designated superblock
689  // Attempt to claim it for this block size.
690  // If the claim fails assume that another thread claimed it
691  // for this block size and try to use it anyway,
692  // but do not update hint.
693 
694  sb_id = sb_id_empty;
695 
696  sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
697 
698  // If successfully changed assignment of empty superblock 'sb_id'
699  // to this block_size then update the hint.
700 
701  const uint32_t state_empty = state_header_mask & *sb_state_array;
702 
703  // If this thread claims the empty block then update the hint
704  update_hint =
705  state_empty == Kokkos::atomic_compare_exchange(
706  sb_state_array, state_empty, block_state);
707  } else if (0 <= sb_id_large) {
708  // Found a larger superblock with space available
709 
710  sb_id = sb_id_large;
711  sb_state = sb_state_large;
712 
713  sb_state_array = m_sb_state_array + (sb_id * m_sb_state_size);
714  } else {
715  // Did not find a potentially usable superblock
716  --attempt_limit;
717  }
718  }
719 
720  if (update_hint) {
721  Kokkos::atomic_compare_exchange(hint_sb_id_ptr, uint32_t(hint_sb_id),
722  uint32_t(sb_id));
723  }
724  } // end allocation attempt loop
725  //--------------------------------------------------------------------
726 
727  return p;
728  }
729  // end allocate
730  //--------------------------------------------------------------------------
731 
738  KOKKOS_INLINE_FUNCTION
739  void deallocate(void *p, size_t /* alloc_size */) const noexcept {
740  if (nullptr == p) return;
741 
742  // Determine which superblock and block
743  const ptrdiff_t d =
744  ((char *)p) - ((char *)(m_sb_state_array + m_data_offset));
745 
746  // Verify contained within the memory pool's superblocks:
747  const int ok_contains =
748  (0 <= d) && (size_t(d) < (size_t(m_sb_count) << m_sb_size_lg2));
749 
750  int ok_block_aligned = 0;
751  int ok_dealloc_once = 0;
752 
753  if (ok_contains) {
754  const int sb_id = d >> m_sb_size_lg2;
755 
756  // State array for the superblock.
757  volatile uint32_t *const sb_state_array =
758  m_sb_state_array + (sb_id * m_sb_state_size);
759 
760  const uint32_t block_state = (*sb_state_array) & state_header_mask;
761  const uint32_t block_size_lg2 =
762  m_sb_size_lg2 - (block_state >> state_shift);
763 
764  ok_block_aligned = 0 == (d & ((1UL << block_size_lg2) - 1));
765 
766  if (ok_block_aligned) {
767  // Map address to block's bit
768  // mask into superblock and then shift down for block index
769 
770  const uint32_t bit =
771  (d & (ptrdiff_t(1LU << m_sb_size_lg2) - 1)) >> block_size_lg2;
772 
773  const int result = CB::release(sb_state_array, bit, block_state);
774 
775  ok_dealloc_once = 0 <= result;
776 
777 #if 0
778  printf( " MemoryPool(0x%lx) pointer(0x%lx) deallocate sb_id(%d) block_size(%d) block_capacity(%d) block_id(%d) block_claimed(%d)\n"
779  , (uintptr_t)m_sb_state_array
780  , (uintptr_t)p
781  , sb_id
782  , (1u << block_size_lg2)
783  , (1u << (m_sb_size_lg2 - block_size_lg2))
784  , bit
785  , result );
786 #endif
787  }
788  }
789 
790  if (!ok_contains || !ok_block_aligned || !ok_dealloc_once) {
791 #if 0
792  printf( " MemoryPool(0x%lx) pointer(0x%lx) deallocate ok_contains(%d) ok_block_aligned(%d) ok_dealloc_once(%d)\n"
793  , (uintptr_t)m_sb_state_array
794  , (uintptr_t)p
795  , int(ok_contains)
796  , int(ok_block_aligned)
797  , int(ok_dealloc_once) );
798 #endif
799  Kokkos::abort("Kokkos MemoryPool::deallocate given erroneous pointer");
800  }
801  }
802  // end deallocate
803  //--------------------------------------------------------------------------
804 
805  KOKKOS_INLINE_FUNCTION
806  int number_of_superblocks() const noexcept { return m_sb_count; }
807 
808  KOKKOS_INLINE_FUNCTION
809  void superblock_state(int sb_id, int &block_size, int &block_count_capacity,
810  int &block_count_used) const noexcept {
811  block_size = 0;
812  block_count_capacity = 0;
813  block_count_used = 0;
814 
816  Kokkos::Impl::ActiveExecutionMemorySpace,
817  base_memory_space>::accessible) {
818  // Can access the state array
819 
820  const uint32_t state =
821  ((uint32_t volatile *)m_sb_state_array)[sb_id * m_sb_state_size];
822 
823  const uint32_t block_count_lg2 = state >> state_shift;
824  const uint32_t block_used = state & state_used_mask;
825 
826  block_size = 1LU << (m_sb_size_lg2 - block_count_lg2);
827  block_count_capacity = 1LU << block_count_lg2;
828  block_count_used = block_used;
829  }
830  }
831 };
832 
833 } // namespace Kokkos
834 
835 #endif /* #ifndef KOKKOS_MEMORYPOOL_HPP */
void * allocate(const size_t arg_alloc_size) const
Allocate untracked memory in the space.
KOKKOS_INLINE_FUNCTION bool operator==(complex< RealType1 > const &x, complex< RealType2 > const &y) noexcept
Binary == operator for complex complex.
Replacement for std::pair that works on CUDA devices.
Definition: Kokkos_Pair.hpp:65
void deallocate(void *const arg_alloc_ptr, const size_t arg_alloc_size) const
Deallocate untracked memory in the space.
first_type first
The first element of the pair.
Definition: Kokkos_Pair.hpp:72
Memory management for host memory.
Declaration of parallel operators.
Atomic functions.
second_type second
The second element of the pair.
Definition: Kokkos_Pair.hpp:74
Access relationship between DstMemorySpace and SrcMemorySpace.