IFPACK  Development
 All Classes Namespaces Files Functions Variables Enumerations Friends Pages
Ifpack_MultiListSort.c
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack: Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2002) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #include "Ifpack_config.h"
44 
45 /* Modified by Edmond Chow, to sort integers and carry along an array of
46  doubles */
47 #if 0 /* test program */
48 
49 #include <stdlib.h>
50 #include <stdio.h>
51 
52 void ifpack_multilist_sort (int *const pbase, double *const daux, size_t total_elems);
53 #define QSORTLEN 20
54 
55 int main()
56 {
57  int h[QSORTLEN];
58  double d[QSORTLEN];
59  int i;
60 
61  for (i=0; i<QSORTLEN; i++)
62  {
63  h[i] = rand() >> 20;
64  d[i] = (double) -h[i];
65  }
66 
67  printf("before sorting\n");
68  for (i=0; i<QSORTLEN; i++)
69  printf("%d %f\n", h[i], d[i]);
70 
71  ifpack_multilist_sort(h, d, QSORTLEN);
72 
73  printf("after sorting\n");
74  for (i=0; i<QSORTLEN; i++)
75  printf("%d %f\n", h[i], d[i]);
76 
77  return 0;
78 }
79 #endif /* test program */
80 
81 /* Copyright (C) 1991, 1992, 1996, 1997 Free Software Foundation, Inc.
82  This file is part of the GNU C Library.
83  Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
84 
85  The GNU C Library is free software; you can redistribute it and/or
86  modify it under the terms of the GNU Library General Public License as
87  published by the Free Software Foundation; either version 2 of the
88  License, or (at your option) any later version.
89 
90  The GNU C Library is distributed in the hope that it will be useful,
91  but WITHOUT ANY WARRANTY; without even the implied warranty of
92  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
93  Library General Public License for more details.
94 
95  You should have received a copy of the GNU Library General Public
96  License along with the GNU C Library; see the file COPYING.LIB. If not,
97  write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
98  Boston, MA 02111-1307, USA. */
99 
100 #include <string.h>
101 
102 /* Swap integers pointed to by a and b, and corresponding doubles */
103 #define SWAP(a, b) \
104  do { \
105  itemp = *a; \
106  *a = *b; \
107  *b = itemp; \
108  dtemp = daux[a-pbase]; \
109  daux[a-pbase] = daux[b-pbase]; \
110  daux[b-pbase] = dtemp; \
111  } while (0)
112 
113 /* Discontinue quicksort algorithm when partition gets below this size.
114  This particular magic number was chosen to work best on a Sun 4/260. */
115 #define MAX_THRESH 4
116 
117 /* Stack node declarations used to store unfulfilled partition obligations. */
118 typedef struct
119  {
120  int *lo;
121  int *hi;
122  } stack_node;
123 
124 /* The next 4 #defines implement a very fast in-line stack abstraction. */
125 #define STACK_SIZE (8 * sizeof(unsigned long int))
126 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
127 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
128 #define STACK_NOT_EMPTY (stack < top)
129 
130 
131 /* Order size using quicksort. This implementation incorporates
132  four optimizations discussed in Sedgewick:
133 
134  1. Non-recursive, using an explicit stack of pointer that store the
135  next array partition to sort. To save time, this maximum amount
136  of space required to store an array of MAX_INT is allocated on the
137  stack. Assuming a 32-bit integer, this needs only 32 *
138  sizeof(stack_node) == 136 bits. Pretty cheap, actually.
139 
140  2. Chose the pivot element using a median-of-three decision tree.
141  This reduces the probability of selecting a bad pivot value and
142  eliminates certain extraneous comparisons.
143 
144  3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
145  insertion sort to order the MAX_THRESH items within each partition.
146  This is a big win, since insertion sort is faster for small, mostly
147  sorted array segments.
148 
149  4. The larger of the two sub-partitions is always pushed onto the
150  stack first, with the algorithm then concentrating on the
151  smaller partition. This *guarantees* no more than log (n)
152  stack size is needed (actually O(1) in this case)! */
153 
154 void ifpack_multilist_sort (int *const pbase, double *const daux, size_t total_elems)
155 {
156  int itemp;
157  double dtemp;
158  const size_t size = 1;
159  register int *base_ptr = (int *) pbase;
160 
161  /* Allocating SIZE bytes for a pivot buffer facilitates a better
162  algorithm below since we can do comparisons directly on the pivot. */
163  int pivot_buffer[1];
164  const size_t max_thresh = MAX_THRESH * size;
165 
166  /* edmond: return if total_elems less than zero */
167  if (total_elems <= 0)
168  /* Avoid lossage with unsigned arithmetic below. */
169  return;
170 
171  if (total_elems > MAX_THRESH)
172  {
173  int *lo = base_ptr;
174  int *hi = &lo[size * (total_elems - 1)];
175  /* Largest size needed for 32-bit int!!! */
176  stack_node stack[STACK_SIZE];
177  stack_node *top = stack + 1;
178 
179  while (STACK_NOT_EMPTY)
180  {
181  int *left_ptr;
182  int *right_ptr;
183 
184  int *pivot = pivot_buffer;
185 
186  /* Select median value from among LO, MID, and HI. Rearrange
187  LO and HI so the three values are sorted. This lowers the
188  probability of picking a pathological pivot value and
189  skips a comparison for both the LEFT_PTR and RIGHT_PTR. */
190 
191  int *mid = lo + size * ((hi - lo) / size >> 1);
192 
193  if (*mid - *lo < 0)
194  SWAP (mid, lo);
195  if (*hi - *mid < 0)
196  SWAP (mid, hi);
197  else
198  goto jump_over;
199  if (*mid - *lo < 0)
200  SWAP (mid, lo);
201  jump_over:;
202  *pivot = *mid;
203  pivot = pivot_buffer;
204 
205  left_ptr = lo + size;
206  right_ptr = hi - size;
207 
208  /* Here's the famous ``collapse the walls'' section of quicksort.
209  Gotta like those tight inner loops! They are the main reason
210  that this algorithm runs much faster than others. */
211  do
212  {
213  while (*left_ptr - *pivot < 0)
214  left_ptr += size;
215 
216  while (*pivot - *right_ptr < 0)
217  right_ptr -= size;
218 
219  if (left_ptr < right_ptr)
220  {
221  SWAP (left_ptr, right_ptr);
222  left_ptr += size;
223  right_ptr -= size;
224  }
225  else if (left_ptr == right_ptr)
226  {
227  left_ptr += size;
228  right_ptr -= size;
229  break;
230  }
231  }
232  while (left_ptr <= right_ptr);
233 
234  /* Set up pointers for next iteration. First determine whether
235  left and right partitions are below the threshold size. If so,
236  ignore one or both. Otherwise, push the larger partition's
237  bounds on the stack and continue sorting the smaller one. */
238 
239  if ((size_t) (right_ptr - lo) <= max_thresh)
240  {
241  if ((size_t) (hi - left_ptr) <= max_thresh)
242  /* Ignore both small partitions. */
243  POP (lo, hi);
244  else
245  /* Ignore small left partition. */
246  lo = left_ptr;
247  }
248  else if ((size_t) (hi - left_ptr) <= max_thresh)
249  /* Ignore small right partition. */
250  hi = right_ptr;
251  else if ((right_ptr - lo) > (hi - left_ptr))
252  {
253  /* Push larger left partition indices. */
254  PUSH (lo, right_ptr);
255  lo = left_ptr;
256  }
257  else
258  {
259  /* Push larger right partition indices. */
260  PUSH (left_ptr, hi);
261  hi = right_ptr;
262  }
263  }
264  }
265 
266  /* Once the BASE_PTR array is partially sorted by quicksort the rest
267  is completely sorted using insertion sort, since this is efficient
268  for partitions below MAX_THRESH size. BASE_PTR points to the beginning
269  of the array to sort, and END_PTR points at the very last element in
270  the array (*not* one beyond it!). */
271 
272 #define min(x, y) ((x) < (y) ? (x) : (y))
273 
274  {
275  int *const end_ptr = &base_ptr[size * (total_elems - 1)];
276  int *tmp_ptr = base_ptr;
277  int *thresh = min(end_ptr, base_ptr + max_thresh);
278  register int *run_ptr;
279 
280  /* Find smallest element in first threshold and place it at the
281  array's beginning. This is the smallest array element,
282  and the operation speeds up insertion sort's inner loop. */
283 
284  for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
285  if (*run_ptr - *tmp_ptr < 0)
286  tmp_ptr = run_ptr;
287 
288  if (tmp_ptr != base_ptr)
289  SWAP (tmp_ptr, base_ptr);
290 
291  /* Insertion sort, running from left-hand-side up to right-hand-side. */
292 
293  run_ptr = base_ptr + size;
294  while ((run_ptr += size) <= end_ptr)
295  {
296  tmp_ptr = run_ptr - size;
297  while (*run_ptr - *tmp_ptr < 0)
298  tmp_ptr -= size;
299 
300  tmp_ptr += size;
301  if (tmp_ptr != run_ptr)
302  {
303  int *trav;
304 
305  trav = run_ptr + size;
306  while (--trav >= run_ptr)
307  {
308  int c = *trav;
309  double c2 = daux[trav-pbase];
310 
311  int *hi, *lo;
312 
313  for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
314  {
315  *hi = *lo;
316  daux[hi-pbase] = daux[lo-pbase];
317  }
318  *hi = c;
319  daux[hi-pbase] = c2;
320  }
321  }
322  }
323  }
324 }