Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_radixSort.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_RADIXSORT_HPP
45 #define TPETRA_DETAILS_RADIXSORT_HPP
46 
47 #include "TpetraCore_config.h"
48 #include "Kokkos_Macros.hpp"
49 
50 namespace Tpetra
51 {
52 namespace Details
53 {
54 
66 template<typename KeyType, typename ValueType, typename IndexType>
67 KOKKOS_INLINE_FUNCTION void
68 radixSortKeysAndValues(KeyType* keys, KeyType* keysAux, ValueType* values, ValueType* valuesAux, IndexType n, IndexType upperBound)
69 {
70  if(n <= 1)
71  return;
72  KeyType mask = 0xF;
73  bool inAux = false;
74  //maskPos counts the low bit index of mask (0, 4, 8, ...)
75  IndexType maskPos = 0;
76  //Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
77  IndexType sortBits = 0;
78  while(upperBound)
79  {
80  upperBound >>= 1;
81  sortBits++;
82  }
83  for(IndexType s = 0; s < (sortBits + 3) / 4; s++)
84  {
85  //Count the number of elements in each bucket
86  IndexType count[16] = {0};
87  IndexType offset[17] = {0};
88  if(!inAux)
89  {
90  for(IndexType i = 0; i < n; i++)
91  {
92  count[(keys[i] & mask) >> maskPos]++;
93  }
94  }
95  else
96  {
97  for(IndexType i = 0; i < n; i++)
98  {
99  count[(keysAux[i] & mask) >> maskPos]++;
100  }
101  }
102  //get offset as the prefix sum for count
103  for(IndexType i = 0; i < 16; i++)
104  {
105  offset[i + 1] = offset[i] + count[i];
106  }
107  //now for each element in [lo, hi), move it to its offset in the other buffer
108  //this branch should be ok because whichBuf is the same on all threads
109  if(!inAux)
110  {
111  //copy from *Over to *Aux
112  for(IndexType i = 0; i < n; i++)
113  {
114  IndexType bucket = (keys[i] & mask) >> maskPos;
115  keysAux[offset[bucket + 1] - count[bucket]] = keys[i];
116  valuesAux[offset[bucket + 1] - count[bucket]] = values[i];
117  count[bucket]--;
118  }
119  }
120  else
121  {
122  //copy from *Aux to *Over
123  for(IndexType i = 0; i < n; i++)
124  {
125  IndexType bucket = (keysAux[i] & mask) >> maskPos;
126  keys[offset[bucket + 1] - count[bucket]] = keysAux[i];
127  values[offset[bucket + 1] - count[bucket]] = valuesAux[i];
128  count[bucket]--;
129  }
130  }
131  inAux = !inAux;
132  mask = mask << 4;
133  maskPos += 4;
134  }
135  if(inAux)
136  {
137  //need to deep copy from aux arrays to main
138  for(IndexType i = 0; i < n; i++)
139  {
140  keys[i] = keysAux[i];
141  values[i] = valuesAux[i];
142  }
143  }
144 }
145 
146 } //namespace Details
147 } //namespace Tpetra
148 
149 #endif //include guard
150 
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.