Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_radixSort.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_RADIXSORT_HPP
11 #define TPETRA_DETAILS_RADIXSORT_HPP
12 
13 #include "TpetraCore_config.h"
14 #include "Kokkos_Macros.hpp"
15 
16 namespace Tpetra
17 {
18 namespace Details
19 {
20 
32 template<typename KeyType, typename ValueType, typename IndexType>
33 KOKKOS_INLINE_FUNCTION void
34 radixSortKeysAndValues(KeyType* keys, KeyType* keysAux, ValueType* values, ValueType* valuesAux, IndexType n, IndexType upperBound)
35 {
36  if(n <= 1)
37  return;
38  KeyType mask = 0xF;
39  bool inAux = false;
40  //maskPos counts the low bit index of mask (0, 4, 8, ...)
41  IndexType maskPos = 0;
42  //Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
43  IndexType sortBits = 0;
44  while(upperBound)
45  {
46  upperBound >>= 1;
47  sortBits++;
48  }
49  for(IndexType s = 0; s < (sortBits + 3) / 4; s++)
50  {
51  //Count the number of elements in each bucket
52  IndexType count[16] = {0};
53  IndexType offset[17] = {0};
54  if(!inAux)
55  {
56  for(IndexType i = 0; i < n; i++)
57  {
58  count[(keys[i] & mask) >> maskPos]++;
59  }
60  }
61  else
62  {
63  for(IndexType i = 0; i < n; i++)
64  {
65  count[(keysAux[i] & mask) >> maskPos]++;
66  }
67  }
68  //get offset as the prefix sum for count
69  for(IndexType i = 0; i < 16; i++)
70  {
71  offset[i + 1] = offset[i] + count[i];
72  }
73  //now for each element in [lo, hi), move it to its offset in the other buffer
74  //this branch should be ok because whichBuf is the same on all threads
75  if(!inAux)
76  {
77  //copy from *Over to *Aux
78  for(IndexType i = 0; i < n; i++)
79  {
80  IndexType bucket = (keys[i] & mask) >> maskPos;
81  keysAux[offset[bucket + 1] - count[bucket]] = keys[i];
82  valuesAux[offset[bucket + 1] - count[bucket]] = values[i];
83  count[bucket]--;
84  }
85  }
86  else
87  {
88  //copy from *Aux to *Over
89  for(IndexType i = 0; i < n; i++)
90  {
91  IndexType bucket = (keysAux[i] & mask) >> maskPos;
92  keys[offset[bucket + 1] - count[bucket]] = keysAux[i];
93  values[offset[bucket + 1] - count[bucket]] = valuesAux[i];
94  count[bucket]--;
95  }
96  }
97  inAux = !inAux;
98  mask = mask << 4;
99  maskPos += 4;
100  }
101  if(inAux)
102  {
103  //need to deep copy from aux arrays to main
104  for(IndexType i = 0; i < n; i++)
105  {
106  keys[i] = keysAux[i];
107  values[i] = valuesAux[i];
108  }
109  }
110 }
111 
112 } //namespace Details
113 } //namespace Tpetra
114 
115 #endif //include guard
116 
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.