Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_radixSort.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_RADIXSORT_HPP
11 #define TPETRA_DETAILS_RADIXSORT_HPP
12 
13 #include "TpetraCore_config.h"
14 #include "Kokkos_Macros.hpp"
15 
16 namespace Tpetra {
17 namespace Details {
18 
30 template <typename KeyType, typename ValueType, typename IndexType>
31 KOKKOS_INLINE_FUNCTION void
32 radixSortKeysAndValues(KeyType* keys, KeyType* keysAux, ValueType* values, ValueType* valuesAux, IndexType n, IndexType upperBound) {
33  if (n <= 1)
34  return;
35  KeyType mask = 0xF;
36  bool inAux = false;
37  // maskPos counts the low bit index of mask (0, 4, 8, ...)
38  IndexType maskPos = 0;
39  // Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
40  IndexType sortBits = 0;
41  while (upperBound) {
42  upperBound >>= 1;
43  sortBits++;
44  }
45  for (IndexType s = 0; s < (sortBits + 3) / 4; s++) {
46  // Count the number of elements in each bucket
47  IndexType count[16] = {0};
48  IndexType offset[17] = {0};
49  if (!inAux) {
50  for (IndexType i = 0; i < n; i++) {
51  count[(keys[i] & mask) >> maskPos]++;
52  }
53  } else {
54  for (IndexType i = 0; i < n; i++) {
55  count[(keysAux[i] & mask) >> maskPos]++;
56  }
57  }
58  // get offset as the prefix sum for count
59  for (IndexType i = 0; i < 16; i++) {
60  offset[i + 1] = offset[i] + count[i];
61  }
62  // now for each element in [lo, hi), move it to its offset in the other buffer
63  // this branch should be ok because whichBuf is the same on all threads
64  if (!inAux) {
65  // copy from *Over to *Aux
66  for (IndexType i = 0; i < n; i++) {
67  IndexType bucket = (keys[i] & mask) >> maskPos;
68  keysAux[offset[bucket + 1] - count[bucket]] = keys[i];
69  valuesAux[offset[bucket + 1] - count[bucket]] = values[i];
70  count[bucket]--;
71  }
72  } else {
73  // copy from *Aux to *Over
74  for (IndexType i = 0; i < n; i++) {
75  IndexType bucket = (keysAux[i] & mask) >> maskPos;
76  keys[offset[bucket + 1] - count[bucket]] = keysAux[i];
77  values[offset[bucket + 1] - count[bucket]] = valuesAux[i];
78  count[bucket]--;
79  }
80  }
81  inAux = !inAux;
82  mask = mask << 4;
83  maskPos += 4;
84  }
85  if (inAux) {
86  // need to deep copy from aux arrays to main
87  for (IndexType i = 0; i < n; i++) {
88  keys[i] = keysAux[i];
89  values[i] = valuesAux[i];
90  }
91  }
92 }
93 
94 } // namespace Details
95 } // namespace Tpetra
96 
97 #endif // include guard
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.