Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_shortSort.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_SHORTSORT_HPP
45 #define TPETRA_DETAILS_SHORTSORT_HPP
46 
54 
55 #include "TpetraCore_config.h"
56 #include "Kokkos_Macros.hpp"
57 #include <type_traits>
58 
59 namespace Tpetra {
60 namespace Details {
61 
62 // Make sure that the macro defined below wasn't defined somewhere else.
63 #ifdef TPETRA_DETAILS_SWAP_KEYSANDVALUES
64 # error "The TPETRA_DETAILS_SWAP_KEYSANDVALUES macro is already defined."
65 #endif // TPETRA_DETAILS_SWAP_KEYSANDVALUES
66 
87 #define TPETRA_DETAILS_SWAP_KEYSANDVALUES( i, j ) \
88  if (keys[i] > keys[j]) { \
89  const KeyType tmpKey (keys[i]); \
90  keys[i] = keys[j]; \
91  keys[j] = tmpKey; \
92  const ValueType tmpVal (values[i]); \
93  values[i] = values[j]; \
94  values[j] = tmpVal; \
95  }
96 
97 // Make sure that the macro defined below wasn't defined somewhere else.
98 #ifdef TPETRA_DETAILS_SWAP_KEYS
99 # error "The TPETRA_DETAILS_SWAP_KEYS macro is already defined."
100 #endif // TPETRA_DETAILS_SWAP_KEYSANDVALUES
101 
119 #define TPETRA_DETAILS_SWAP_KEYS( i, j ) \
120  if (keys[i] > keys[j]) { \
121  const KeyType tmpKey (keys[i]); \
122  keys[i] = keys[j]; \
123  keys[j] = tmpKey; \
124  }
125 
136 template<class KeyType, class ValueType>
137 KOKKOS_FUNCTION void
138 shortSortKeysAndValues_2 (KeyType keys[2], ValueType values[2])
139 {
140  // Since this function takes a constant number of entries, I use a
141  // sorting network here. For 2 entries, the sorting network is
142  // nearly trivial.
144 }
145 
152 template<class KeyType>
153 KOKKOS_FUNCTION void
154 shortSortKeys_2 (KeyType keys[2])
155 {
156  // Since this function takes a constant number of entries, I use a
157  // sorting network here. For 2 entries, the sorting network is
158  // nearly trivial.
160 }
161 
172 template<class KeyType, class ValueType>
173 KOKKOS_FUNCTION void
174 shortSortKeysAndValues_3 (KeyType keys[3], ValueType values[3])
175 {
176  // Since this function takes a constant number of entries, I use a
177  // sorting network here. To make the network, I used the generator
178  // at
179  //
180  // http://pages.ripco.net/~jgamble/nw.html
181  //
182  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
186 }
187 
194 template<class KeyType>
195 KOKKOS_FUNCTION void
196 shortSortKeys_3 (KeyType keys[3])
197 {
198  // Since this function takes a constant number of entries, I use a
199  // sorting network here. To make the network, I used the generator
200  // at
201  //
202  // http://pages.ripco.net/~jgamble/nw.html
203  //
204  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
208 }
209 
220 template<class KeyType, class ValueType>
221 KOKKOS_FUNCTION void
222 shortSortKeysAndValues_4 (KeyType keys[4], ValueType values[4])
223 {
224  // Since this function takes a constant number of entries, I use a
225  // sorting network here. To make the network, I used the generator
226  // at
227  //
228  // http://pages.ripco.net/~jgamble/nw.html
229  //
230  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
236 }
237 
244 template<class KeyType>
245 KOKKOS_FUNCTION void
246 shortSortKeys_4 (KeyType keys[4])
247 {
248  // Since this function takes a constant number of entries, I use a
249  // sorting network here. To make the network, I used the generator
250  // at
251  //
252  // http://pages.ripco.net/~jgamble/nw.html
253  //
254  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
260 }
261 
272 template<class KeyType, class ValueType>
273 KOKKOS_FUNCTION void
274 shortSortKeysAndValues_8 (KeyType keys[8], ValueType values[8])
275 {
276  // Since this function takes a constant number of entries, I use a
277  // sorting network here. To make the network, I used the generator
278  // at
279  //
280  // http://pages.ripco.net/~jgamble/nw.html
281  //
282  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
302 }
303 
310 template<class KeyType>
311 KOKKOS_FUNCTION void
312 shortSortKeys_8 (KeyType keys[8])
313 {
314  // Since this function takes a constant number of entries, I use a
315  // sorting network here. To make the network, I used the generator
316  // at
317  //
318  // http://pages.ripco.net/~jgamble/nw.html
319  //
320  // "Best" algorithm in this case is the Bose-Nelson Algorithm.
340 }
341 
347 template<class KeyType, class ValueType, class IndexType>
348 KOKKOS_FUNCTION void
349 shellSortKeysAndValues (KeyType keys[],
350  ValueType values[],
351  const IndexType n)
352 {
353  static_assert (std::is_integral<IndexType>::value,
354  "IndexType must be a signed integer type.");
355  static_assert (std::is_signed<IndexType>::value,
356  "IndexType must be a signed integer type. "
357  "This implementation does a count-down loop, "
358  "and may thus loop forever "
359  "if one attempts to use it with unsigned IndexType.");
360  constexpr IndexType ZERO = 0;
361  IndexType midpoint = n / static_cast<IndexType> (2);
362 
363  while (midpoint > ZERO) {
364  // Avoid names like "max" in case they collide with macros.
365  const IndexType theMax = n - midpoint;
366  for (IndexType j = 0; j < theMax; ++j) {
367  // IndexType is signed, so it's legit to compare >= 0.
368  for (IndexType k = j; k >= 0; k -= midpoint) {
369  if (keys[k + midpoint] >= keys[k]) {
370  break;
371  }
372  const KeyType tmpKey = keys[k + midpoint];
373  keys[k + midpoint] = keys[k];
374  keys[k] = tmpKey;
375  const ValueType tmpVal = values[k + midpoint];
376  values[k + midpoint] = values[k];
377  values[k] = tmpVal;
378  }
379  }
380  midpoint = midpoint / 2;
381  }
382 }
383 
388 template<class KeyType, class IndexType>
389 KOKKOS_FUNCTION void
390 shellSortKeys (KeyType keys[], const IndexType n)
391 {
392  static_assert (std::is_integral<IndexType>::value,
393  "IndexType must be a signed integer type.");
394  static_assert (std::is_signed<IndexType>::value,
395  "IndexType must be a signed integer type. "
396  "This implementation does a count-down loop, "
397  "and may thus loop forever "
398  "if one attempts to use it with unsigned IndexType.");
399  constexpr IndexType ZERO = 0;
400  IndexType midpoint = n / static_cast<IndexType> (2);
401 
402  while (midpoint > ZERO) {
403  // Avoid names like "max" in case they collide with macros.
404  const IndexType theMax = n - midpoint;
405  for (int j = 0; j < theMax; ++j) {
406  // IndexType must be signed, so it's legit to compare >= 0.
407  for (int k = j; k >= 0; k -= midpoint) {
408  if (keys[k + midpoint] >= keys[k]) {
409  break;
410  }
411  const KeyType tmpKey = keys[k + midpoint];
412  keys[k + midpoint] = keys[k];
413  keys[k] = tmpKey;
414  }
415  }
416  midpoint = midpoint / 2;
417  }
418 }
419 
420 template<typename KeyType, typename ValueType, typename IndexType>
421 KOKKOS_FUNCTION void
422 shellSortKeysAndValues2(KeyType* keys, ValueType* values, IndexType n)
423 {
424  IndexType ngaps = 10;
425  //Use Ciura's gap choices
426  IndexType gaps[] = {3548, 1577, 701, 301, 132, 57, 23, 10, 4, 1};
427  for(IndexType gapIndex = 0; gapIndex < ngaps; gapIndex++)
428  {
429  auto gap = gaps[gapIndex];
430  if(n < gap)
431  continue;
432  //insertion sort the array {keys[0*gap], keys[1*gap], keys[2*gap], ...}
433  for(IndexType gapOffset = 0; gapOffset < gap; gapOffset++)
434  {
435  for(IndexType i = gap + gapOffset; i < n; i += gap)
436  {
437  //avoid extra swaps: scan for the final position of keys[i]
438  if(keys[i - gap] > keys[i])
439  {
440  IndexType finalPos = i - gap;
441  while(finalPos - gap >= 0 && keys[finalPos - gap] > keys[i])
442  {
443  finalPos -= gap;
444  }
445  //save keys/values [i], then shift up all keys/values between finalPos and i-gap (inclusive)
446  auto tempKey = keys[i];
447  auto tempVal = values[i];
448  for(IndexType j = i - gap; j >= finalPos; j -= gap)
449  {
450  keys[j + gap] = keys[j];
451  values[j + gap] = values[j];
452  }
453  keys[finalPos] = tempKey;
454  values[finalPos] = tempVal;
455  }
456  }
457  }
458  }
459 #undef SHELL_SWAP
460 }
461 
462 } // namespace Details
463 } // namespace Tpetra
464 
465 #endif // TPETRA_DETAILS_SHORTSORT_HPP
KOKKOS_FUNCTION void shortSortKeys_8(KeyType keys[8])
Sort length-8 array of keys.
KOKKOS_FUNCTION void shortSortKeys_3(KeyType keys[3])
Sort length-3 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_4(KeyType keys[4], ValueType values[4])
Sort keys and values jointly, by keys, for arrays of length 4.
KOKKOS_FUNCTION void shortSortKeys_4(KeyType keys[4])
Sort length-4 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_2(KeyType keys[2], ValueType values[2])
Sort keys and values jointly, by keys, for arrays of length 2.
KOKKOS_FUNCTION void shellSortKeys(KeyType keys[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys.
KOKKOS_FUNCTION void shortSortKeys_2(KeyType keys[2])
Sort length-2 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_3(KeyType keys[3], ValueType values[3])
Sort keys and values jointly, by keys, for arrays of length 3.
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it&#39;s one word) the input array keys, and apply the resulting permutation to the input...
#define TPETRA_DETAILS_SWAP_KEYS(i, j)
Macro that swaps the i and j entries of keys, if keys[i] &gt; keysj.
Replace old values with zero.
KOKKOS_FUNCTION void shortSortKeysAndValues_8(KeyType keys[8], ValueType values[8])
Sort keys and values jointly, by keys, for arrays of length 8.
#define TPETRA_DETAILS_SWAP_KEYSANDVALUES(i, j)
Macro that swaps the i and j entries of keys and values, if keys[i] &gt; keysj.