Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_shortSort.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_SHORTSORT_HPP
11 #define TPETRA_DETAILS_SHORTSORT_HPP
12 
20 
21 #include "TpetraCore_config.h"
22 #include "Kokkos_Macros.hpp"
23 #include <type_traits>
24 
25 namespace Tpetra {
26 namespace Details {
27 
28 // Make sure that the macro defined below wasn't defined somewhere else.
29 #ifdef TPETRA_DETAILS_SWAP_KEYSANDVALUES
30 #error "The TPETRA_DETAILS_SWAP_KEYSANDVALUES macro is already defined."
31 #endif // TPETRA_DETAILS_SWAP_KEYSANDVALUES
32 
53 #define TPETRA_DETAILS_SWAP_KEYSANDVALUES(i, j) \
54  if (keys[i] > keys[j]) { \
55  const KeyType tmpKey(keys[i]); \
56  keys[i] = keys[j]; \
57  keys[j] = tmpKey; \
58  const ValueType tmpVal(values[i]); \
59  values[i] = values[j]; \
60  values[j] = tmpVal; \
61  }
62 
63 // Make sure that the macro defined below wasn't defined somewhere else.
64 #ifdef TPETRA_DETAILS_SWAP_KEYS
65 #error "The TPETRA_DETAILS_SWAP_KEYS macro is already defined."
66 #endif // TPETRA_DETAILS_SWAP_KEYSANDVALUES
67 
85 #define TPETRA_DETAILS_SWAP_KEYS(i, j) \
86  if (keys[i] > keys[j]) { \
87  const KeyType tmpKey(keys[i]); \
88  keys[i] = keys[j]; \
89  keys[j] = tmpKey; \
90  }
91 
102 template <class KeyType, class ValueType>
103 KOKKOS_FUNCTION void
104 shortSortKeysAndValues_2(KeyType keys[2], ValueType values[2]) {
105  // Since this function takes a constant number of entries, I use a
106  // sorting network here. For 2 entries, the sorting network is
107  // nearly trivial.
109 }
110 
117 template <class KeyType>
118 KOKKOS_FUNCTION void
119 shortSortKeys_2(KeyType keys[2]) {
120  // Since this function takes a constant number of entries, I use a
121  // sorting network here. For 2 entries, the sorting network is
122  // nearly trivial.
124 }
125 
136 template <class KeyType, class ValueType>
137 KOKKOS_FUNCTION void
138 shortSortKeysAndValues_3(KeyType keys[3], ValueType values[3]) {
139  // Since this function takes a constant number of entries, I use a
140  // sorting network here. To make the network, I used the generator
141  // at
142  //
143  // http://pages.ripco.net/~jgamble/nw.html
144  //
145  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
149 }
150 
157 template <class KeyType>
158 KOKKOS_FUNCTION void
159 shortSortKeys_3(KeyType keys[3]) {
160  // Since this function takes a constant number of entries, I use a
161  // sorting network here. To make the network, I used the generator
162  // at
163  //
164  // http://pages.ripco.net/~jgamble/nw.html
165  //
166  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
170 }
171 
182 template <class KeyType, class ValueType>
183 KOKKOS_FUNCTION void
184 shortSortKeysAndValues_4(KeyType keys[4], ValueType values[4]) {
185  // Since this function takes a constant number of entries, I use a
186  // sorting network here. To make the network, I used the generator
187  // at
188  //
189  // http://pages.ripco.net/~jgamble/nw.html
190  //
191  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
197 }
198 
205 template <class KeyType>
206 KOKKOS_FUNCTION void
207 shortSortKeys_4(KeyType keys[4]) {
208  // Since this function takes a constant number of entries, I use a
209  // sorting network here. To make the network, I used the generator
210  // at
211  //
212  // http://pages.ripco.net/~jgamble/nw.html
213  //
214  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
220 }
221 
232 template <class KeyType, class ValueType>
233 KOKKOS_FUNCTION void
234 shortSortKeysAndValues_8(KeyType keys[8], ValueType values[8]) {
235  // Since this function takes a constant number of entries, I use a
236  // sorting network here. To make the network, I used the generator
237  // at
238  //
239  // http://pages.ripco.net/~jgamble/nw.html
240  //
241  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
261 }
262 
269 template <class KeyType>
270 KOKKOS_FUNCTION void
271 shortSortKeys_8(KeyType keys[8]) {
272  // Since this function takes a constant number of entries, I use a
273  // sorting network here. To make the network, I used the generator
274  // at
275  //
276  // http://pages.ripco.net/~jgamble/nw.html
277  //
278  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
298 }
299 
305 template <class KeyType, class ValueType, class IndexType>
306 KOKKOS_FUNCTION void
307 shellSortKeysAndValues(KeyType keys[],
308  ValueType values[],
309  const IndexType n) {
310  static_assert(std::is_integral<IndexType>::value,
311  "IndexType must be a signed integer type.");
312  static_assert(std::is_signed<IndexType>::value,
313  "IndexType must be a signed integer type. "
314  "This implementation does a count-down loop, "
315  "and may thus loop forever "
316  "if one attempts to use it with unsigned IndexType.");
317  constexpr IndexType ZERO = 0;
318  IndexType midpoint = n / static_cast<IndexType>(2);
319 
320  while (midpoint > ZERO) {
321  // Avoid names like "max" in case they collide with macros.
322  const IndexType theMax = n - midpoint;
323  for (IndexType j = 0; j < theMax; ++j) {
324  // IndexType is signed, so it's legit to compare >= 0.
325  for (IndexType k = j; k >= 0; k -= midpoint) {
326  if (keys[k + midpoint] >= keys[k]) {
327  break;
328  }
329  const KeyType tmpKey = keys[k + midpoint];
330  keys[k + midpoint] = keys[k];
331  keys[k] = tmpKey;
332  const ValueType tmpVal = values[k + midpoint];
333  values[k + midpoint] = values[k];
334  values[k] = tmpVal;
335  }
336  }
337  midpoint = midpoint / 2;
338  }
339 }
340 
345 template <class KeyType, class IndexType>
346 KOKKOS_FUNCTION void
347 shellSortKeys(KeyType keys[], const IndexType n) {
348  static_assert(std::is_integral<IndexType>::value,
349  "IndexType must be a signed integer type.");
350  static_assert(std::is_signed<IndexType>::value,
351  "IndexType must be a signed integer type. "
352  "This implementation does a count-down loop, "
353  "and may thus loop forever "
354  "if one attempts to use it with unsigned IndexType.");
355  constexpr IndexType ZERO = 0;
356  IndexType midpoint = n / static_cast<IndexType>(2);
357 
358  while (midpoint > ZERO) {
359  // Avoid names like "max" in case they collide with macros.
360  const IndexType theMax = n - midpoint;
361  for (int j = 0; j < theMax; ++j) {
362  // IndexType must be signed, so it's legit to compare >= 0.
363  for (int k = j; k >= 0; k -= midpoint) {
364  if (keys[k + midpoint] >= keys[k]) {
365  break;
366  }
367  const KeyType tmpKey = keys[k + midpoint];
368  keys[k + midpoint] = keys[k];
369  keys[k] = tmpKey;
370  }
371  }
372  midpoint = midpoint / 2;
373  }
374 }
375 
376 template <typename KeyType, typename ValueType, typename IndexType>
377 KOKKOS_FUNCTION void
378 shellSortKeysAndValues2(KeyType* keys, ValueType* values, IndexType n) {
379  IndexType ngaps = 10;
380  // Use Ciura's gap choices
381  IndexType gaps[] = {3548, 1577, 701, 301, 132, 57, 23, 10, 4, 1};
382  for (IndexType gapIndex = 0; gapIndex < ngaps; gapIndex++) {
383  auto gap = gaps[gapIndex];
384  if (n < gap)
385  continue;
386  // insertion sort the array {keys[0*gap], keys[1*gap], keys[2*gap], ...}
387  for (IndexType gapOffset = 0; gapOffset < gap; gapOffset++) {
388  for (IndexType i = gap + gapOffset; i < n; i += gap) {
389  // avoid extra swaps: scan for the final position of keys[i]
390  if (keys[i - gap] > keys[i]) {
391  IndexType finalPos = i - gap;
392  while (finalPos - gap >= 0 && keys[finalPos - gap] > keys[i]) {
393  finalPos -= gap;
394  }
395  // save keys/values [i], then shift up all keys/values between finalPos and i-gap (inclusive)
396  auto tempKey = keys[i];
397  auto tempVal = values[i];
398  for (IndexType j = i - gap; j >= finalPos; j -= gap) {
399  keys[j + gap] = keys[j];
400  values[j + gap] = values[j];
401  }
402  keys[finalPos] = tempKey;
403  values[finalPos] = tempVal;
404  }
405  }
406  }
407  }
408 #undef SHELL_SWAP
409 }
410 
411 } // namespace Details
412 } // namespace Tpetra
413 
414 #endif // TPETRA_DETAILS_SHORTSORT_HPP
KOKKOS_FUNCTION void shortSortKeys_8(KeyType keys[8])
Sort length-8 array of keys.
KOKKOS_FUNCTION void shortSortKeys_3(KeyType keys[3])
Sort length-3 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_4(KeyType keys[4], ValueType values[4])
Sort keys and values jointly, by keys, for arrays of length 4.
KOKKOS_FUNCTION void shortSortKeys_4(KeyType keys[4])
Sort length-4 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_2(KeyType keys[2], ValueType values[2])
Sort keys and values jointly, by keys, for arrays of length 2.
KOKKOS_FUNCTION void shellSortKeys(KeyType keys[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys.
KOKKOS_FUNCTION void shortSortKeys_2(KeyType keys[2])
Sort length-2 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_3(KeyType keys[3], ValueType values[3])
Sort keys and values jointly, by keys, for arrays of length 3.
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys, and apply the resulting permutation to the input...
#define TPETRA_DETAILS_SWAP_KEYS(i, j)
Macro that swaps the i and j entries of keys, if keys[i] &gt; keysj.
Replace old values with zero.
KOKKOS_FUNCTION void shortSortKeysAndValues_8(KeyType keys[8], ValueType values[8])
Sort keys and values jointly, by keys, for arrays of length 8.
#define TPETRA_DETAILS_SWAP_KEYSANDVALUES(i, j)
Macro that swaps the i and j entries of keys and values, if keys[i] &gt; keysj.