Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_shortSort.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_SHORTSORT_HPP
11 #define TPETRA_DETAILS_SHORTSORT_HPP
12 
20 
21 #include "TpetraCore_config.h"
22 #include "Kokkos_Macros.hpp"
23 #include <type_traits>
24 
25 namespace Tpetra {
26 namespace Details {
27 
28 // Make sure that the macro defined below wasn't defined somewhere else.
29 #ifdef TPETRA_DETAILS_SWAP_KEYSANDVALUES
30 # error "The TPETRA_DETAILS_SWAP_KEYSANDVALUES macro is already defined."
31 #endif // TPETRA_DETAILS_SWAP_KEYSANDVALUES
32 
53 #define TPETRA_DETAILS_SWAP_KEYSANDVALUES( i, j ) \
54  if (keys[i] > keys[j]) { \
55  const KeyType tmpKey (keys[i]); \
56  keys[i] = keys[j]; \
57  keys[j] = tmpKey; \
58  const ValueType tmpVal (values[i]); \
59  values[i] = values[j]; \
60  values[j] = tmpVal; \
61  }
62 
63 // Make sure that the macro defined below wasn't defined somewhere else.
64 #ifdef TPETRA_DETAILS_SWAP_KEYS
65 # error "The TPETRA_DETAILS_SWAP_KEYS macro is already defined."
66 #endif // TPETRA_DETAILS_SWAP_KEYSANDVALUES
67 
85 #define TPETRA_DETAILS_SWAP_KEYS( i, j ) \
86  if (keys[i] > keys[j]) { \
87  const KeyType tmpKey (keys[i]); \
88  keys[i] = keys[j]; \
89  keys[j] = tmpKey; \
90  }
91 
102 template<class KeyType, class ValueType>
103 KOKKOS_FUNCTION void
104 shortSortKeysAndValues_2 (KeyType keys[2], ValueType values[2])
105 {
106  // Since this function takes a constant number of entries, I use a
107  // sorting network here. For 2 entries, the sorting network is
108  // nearly trivial.
110 }
111 
118 template<class KeyType>
119 KOKKOS_FUNCTION void
120 shortSortKeys_2 (KeyType keys[2])
121 {
122  // Since this function takes a constant number of entries, I use a
123  // sorting network here. For 2 entries, the sorting network is
124  // nearly trivial.
126 }
127 
138 template<class KeyType, class ValueType>
139 KOKKOS_FUNCTION void
140 shortSortKeysAndValues_3 (KeyType keys[3], ValueType values[3])
141 {
142  // Since this function takes a constant number of entries, I use a
143  // sorting network here. To make the network, I used the generator
144  // at
145  //
146  // http://pages.ripco.net/~jgamble/nw.html
147  //
148  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
152 }
153 
160 template<class KeyType>
161 KOKKOS_FUNCTION void
162 shortSortKeys_3 (KeyType keys[3])
163 {
164  // Since this function takes a constant number of entries, I use a
165  // sorting network here. To make the network, I used the generator
166  // at
167  //
168  // http://pages.ripco.net/~jgamble/nw.html
169  //
170  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
174 }
175 
186 template<class KeyType, class ValueType>
187 KOKKOS_FUNCTION void
188 shortSortKeysAndValues_4 (KeyType keys[4], ValueType values[4])
189 {
190  // Since this function takes a constant number of entries, I use a
191  // sorting network here. To make the network, I used the generator
192  // at
193  //
194  // http://pages.ripco.net/~jgamble/nw.html
195  //
196  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
202 }
203 
210 template<class KeyType>
211 KOKKOS_FUNCTION void
212 shortSortKeys_4 (KeyType keys[4])
213 {
214  // Since this function takes a constant number of entries, I use a
215  // sorting network here. To make the network, I used the generator
216  // at
217  //
218  // http://pages.ripco.net/~jgamble/nw.html
219  //
220  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
226 }
227 
238 template<class KeyType, class ValueType>
239 KOKKOS_FUNCTION void
240 shortSortKeysAndValues_8 (KeyType keys[8], ValueType values[8])
241 {
242  // Since this function takes a constant number of entries, I use a
243  // sorting network here. To make the network, I used the generator
244  // at
245  //
246  // http://pages.ripco.net/~jgamble/nw.html
247  //
248  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
268 }
269 
276 template<class KeyType>
277 KOKKOS_FUNCTION void
278 shortSortKeys_8 (KeyType keys[8])
279 {
280  // Since this function takes a constant number of entries, I use a
281  // sorting network here. To make the network, I used the generator
282  // at
283  //
284  // http://pages.ripco.net/~jgamble/nw.html
285  //
286  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
306 }
307 
313 template<class KeyType, class ValueType, class IndexType>
314 KOKKOS_FUNCTION void
315 shellSortKeysAndValues (KeyType keys[],
316  ValueType values[],
317  const IndexType n)
318 {
319  static_assert (std::is_integral<IndexType>::value,
320  "IndexType must be a signed integer type.");
321  static_assert (std::is_signed<IndexType>::value,
322  "IndexType must be a signed integer type. "
323  "This implementation does a count-down loop, "
324  "and may thus loop forever "
325  "if one attempts to use it with unsigned IndexType.");
326  constexpr IndexType ZERO = 0;
327  IndexType midpoint = n / static_cast<IndexType> (2);
328 
329  while (midpoint > ZERO) {
330  // Avoid names like "max" in case they collide with macros.
331  const IndexType theMax = n - midpoint;
332  for (IndexType j = 0; j < theMax; ++j) {
333  // IndexType is signed, so it's legit to compare >= 0.
334  for (IndexType k = j; k >= 0; k -= midpoint) {
335  if (keys[k + midpoint] >= keys[k]) {
336  break;
337  }
338  const KeyType tmpKey = keys[k + midpoint];
339  keys[k + midpoint] = keys[k];
340  keys[k] = tmpKey;
341  const ValueType tmpVal = values[k + midpoint];
342  values[k + midpoint] = values[k];
343  values[k] = tmpVal;
344  }
345  }
346  midpoint = midpoint / 2;
347  }
348 }
349 
354 template<class KeyType, class IndexType>
355 KOKKOS_FUNCTION void
356 shellSortKeys (KeyType keys[], const IndexType n)
357 {
358  static_assert (std::is_integral<IndexType>::value,
359  "IndexType must be a signed integer type.");
360  static_assert (std::is_signed<IndexType>::value,
361  "IndexType must be a signed integer type. "
362  "This implementation does a count-down loop, "
363  "and may thus loop forever "
364  "if one attempts to use it with unsigned IndexType.");
365  constexpr IndexType ZERO = 0;
366  IndexType midpoint = n / static_cast<IndexType> (2);
367 
368  while (midpoint > ZERO) {
369  // Avoid names like "max" in case they collide with macros.
370  const IndexType theMax = n - midpoint;
371  for (int j = 0; j < theMax; ++j) {
372  // IndexType must be signed, so it's legit to compare >= 0.
373  for (int k = j; k >= 0; k -= midpoint) {
374  if (keys[k + midpoint] >= keys[k]) {
375  break;
376  }
377  const KeyType tmpKey = keys[k + midpoint];
378  keys[k + midpoint] = keys[k];
379  keys[k] = tmpKey;
380  }
381  }
382  midpoint = midpoint / 2;
383  }
384 }
385 
386 template<typename KeyType, typename ValueType, typename IndexType>
387 KOKKOS_FUNCTION void
388 shellSortKeysAndValues2(KeyType* keys, ValueType* values, IndexType n)
389 {
390  IndexType ngaps = 10;
391  //Use Ciura's gap choices
392  IndexType gaps[] = {3548, 1577, 701, 301, 132, 57, 23, 10, 4, 1};
393  for(IndexType gapIndex = 0; gapIndex < ngaps; gapIndex++)
394  {
395  auto gap = gaps[gapIndex];
396  if(n < gap)
397  continue;
398  //insertion sort the array {keys[0*gap], keys[1*gap], keys[2*gap], ...}
399  for(IndexType gapOffset = 0; gapOffset < gap; gapOffset++)
400  {
401  for(IndexType i = gap + gapOffset; i < n; i += gap)
402  {
403  //avoid extra swaps: scan for the final position of keys[i]
404  if(keys[i - gap] > keys[i])
405  {
406  IndexType finalPos = i - gap;
407  while(finalPos - gap >= 0 && keys[finalPos - gap] > keys[i])
408  {
409  finalPos -= gap;
410  }
411  //save keys/values [i], then shift up all keys/values between finalPos and i-gap (inclusive)
412  auto tempKey = keys[i];
413  auto tempVal = values[i];
414  for(IndexType j = i - gap; j >= finalPos; j -= gap)
415  {
416  keys[j + gap] = keys[j];
417  values[j + gap] = values[j];
418  }
419  keys[finalPos] = tempKey;
420  values[finalPos] = tempVal;
421  }
422  }
423  }
424  }
425 #undef SHELL_SWAP
426 }
427 
428 } // namespace Details
429 } // namespace Tpetra
430 
431 #endif // TPETRA_DETAILS_SHORTSORT_HPP
KOKKOS_FUNCTION void shortSortKeys_8(KeyType keys[8])
Sort length-8 array of keys.
KOKKOS_FUNCTION void shortSortKeys_3(KeyType keys[3])
Sort length-3 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_4(KeyType keys[4], ValueType values[4])
Sort keys and values jointly, by keys, for arrays of length 4.
KOKKOS_FUNCTION void shortSortKeys_4(KeyType keys[4])
Sort length-4 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_2(KeyType keys[2], ValueType values[2])
Sort keys and values jointly, by keys, for arrays of length 2.
KOKKOS_FUNCTION void shellSortKeys(KeyType keys[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys.
KOKKOS_FUNCTION void shortSortKeys_2(KeyType keys[2])
Sort length-2 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_3(KeyType keys[3], ValueType values[3])
Sort keys and values jointly, by keys, for arrays of length 3.
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys, and apply the resulting permutation to the input...
#define TPETRA_DETAILS_SWAP_KEYS(i, j)
Macro that swaps the i and j entries of keys, if keys[i] &gt; keysj.
Replace old values with zero.
KOKKOS_FUNCTION void shortSortKeysAndValues_8(KeyType keys[8], ValueType values[8])
Sort keys and values jointly, by keys, for arrays of length 8.
#define TPETRA_DETAILS_SWAP_KEYSANDVALUES(i, j)
Macro that swaps the i and j entries of keys and values, if keys[i] &gt; keysj.