diff options
Diffstat (limited to 'apps/plugins/sdl/src/stdlib/SDL_qsort.c')
-rw-r--r-- | apps/plugins/sdl/src/stdlib/SDL_qsort.c | 443 |
1 files changed, 443 insertions, 0 deletions
diff --git a/apps/plugins/sdl/src/stdlib/SDL_qsort.c b/apps/plugins/sdl/src/stdlib/SDL_qsort.c new file mode 100644 index 0000000000..2b5abea8ab --- /dev/null +++ b/apps/plugins/sdl/src/stdlib/SDL_qsort.c | |||
@@ -0,0 +1,443 @@ | |||
1 | /* qsort.c | ||
2 | * (c) 1998 Gareth McCaughan | ||
3 | * | ||
4 | * This is a drop-in replacement for the C library's |qsort()| routine. | ||
5 | * | ||
6 | * Features: | ||
7 | * - Median-of-three pivoting (and more) | ||
8 | * - Truncation and final polishing by a single insertion sort | ||
9 | * - Early truncation when no swaps needed in pivoting step | ||
10 | * - Explicit recursion, guaranteed not to overflow | ||
11 | * - A few little wrinkles stolen from the GNU |qsort()|. | ||
12 | * - separate code for non-aligned / aligned / word-size objects | ||
13 | * | ||
14 | * This code may be reproduced freely provided | ||
15 | * - this file is retained unaltered apart from minor | ||
16 | * changes for portability and efficiency | ||
17 | * - no changes are made to this comment | ||
18 | * - any changes that *are* made are clearly flagged | ||
19 | * - the _ID string below is altered by inserting, after | ||
20 | * the date, the string " altered" followed at your option | ||
21 | * by other material. (Exceptions: you may change the name | ||
22 | * of the exported routine without changing the ID string. | ||
23 | * You may change the values of the macros TRUNC_* and | ||
24 | * PIVOT_THRESHOLD without changing the ID string, provided | ||
25 | * they remain constants with TRUNC_nonaligned, TRUNC_aligned | ||
26 | * and TRUNC_words/WORD_BYTES between 8 and 24, and | ||
27 | * PIVOT_THRESHOLD between 32 and 200.) | ||
28 | * | ||
29 | * You may use it in anything you like; you may make money | ||
30 | * out of it; you may distribute it in object form or as | ||
31 | * part of an executable without including source code; | ||
32 | * you don't have to credit me. (But it would be nice if | ||
33 | * you did.) | ||
34 | * | ||
35 | * If you find problems with this code, or find ways of | ||
36 | * making it significantly faster, please let me know! | ||
37 | * My e-mail address, valid as of early 1998 and certainly | ||
38 | * OK for at least the next 18 months, is | ||
39 | * gjm11@dpmms.cam.ac.uk | ||
40 | * Thanks! | ||
41 | * | ||
42 | * Gareth McCaughan Peterhouse Cambridge 1998 | ||
43 | */ | ||
44 | #include "SDL_config.h" | ||
45 | |||
46 | /* | ||
47 | #include <assert.h> | ||
48 | #include <stdlib.h> | ||
49 | #include <string.h> | ||
50 | */ | ||
51 | #include "SDL_stdinc.h" | ||
52 | |||
53 | #ifdef assert | ||
54 | #undef assert | ||
55 | #endif | ||
56 | #define assert(X) | ||
57 | #ifdef malloc | ||
58 | #undef malloc | ||
59 | #endif | ||
60 | #define malloc SDL_malloc | ||
61 | #ifdef free | ||
62 | #undef free | ||
63 | #endif | ||
64 | #define free SDL_free | ||
65 | #ifdef memcpy | ||
66 | #undef memcpy | ||
67 | #endif | ||
68 | #define memcpy SDL_memcpy | ||
69 | #ifdef memmove | ||
70 | #undef memmove | ||
71 | #endif | ||
72 | #define memmove SDL_memmove | ||
73 | #ifdef qsort | ||
74 | #undef qsort | ||
75 | #endif | ||
76 | #define qsort SDL_qsort | ||
77 | |||
78 | |||
79 | #ifndef HAVE_QSORT | ||
80 | |||
81 | static char _ID[]="<qsort.c gjm 1.12 1998-03-19>"; | ||
82 | |||
83 | /* How many bytes are there per word? (Must be a power of 2, | ||
84 | * and must in fact equal sizeof(int).) | ||
85 | */ | ||
86 | #define WORD_BYTES sizeof(int) | ||
87 | |||
88 | /* How big does our stack need to be? Answer: one entry per | ||
89 | * bit in a |size_t|. | ||
90 | */ | ||
91 | #define STACK_SIZE (8*sizeof(size_t)) | ||
92 | |||
93 | /* Different situations have slightly different requirements, | ||
94 | * and we make life epsilon easier by using different truncation | ||
95 | * points for the three different cases. | ||
96 | * So far, I have tuned TRUNC_words and guessed that the same | ||
97 | * value might work well for the other two cases. Of course | ||
98 | * what works well on my machine might work badly on yours. | ||
99 | */ | ||
100 | #define TRUNC_nonaligned 12 | ||
101 | #define TRUNC_aligned 12 | ||
102 | #define TRUNC_words 12*WORD_BYTES /* nb different meaning */ | ||
103 | |||
104 | /* We use a simple pivoting algorithm for shortish sub-arrays | ||
105 | * and a more complicated one for larger ones. The threshold | ||
106 | * is PIVOT_THRESHOLD. | ||
107 | */ | ||
108 | #define PIVOT_THRESHOLD 40 | ||
109 | |||
110 | typedef struct { char * first; char * last; } stack_entry; | ||
111 | #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;} | ||
112 | #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;} | ||
113 | #define doLeft {first=ffirst;llast=last;continue;} | ||
114 | #define doRight {ffirst=first;last=llast;continue;} | ||
115 | #define pop {if (--stacktop<0) break;\ | ||
116 | first=ffirst=stack[stacktop].first;\ | ||
117 | last=llast=stack[stacktop].last;\ | ||
118 | continue;} | ||
119 | |||
120 | /* Some comments on the implementation. | ||
121 | * 1. When we finish partitioning the array into "low" | ||
122 | * and "high", we forget entirely about short subarrays, | ||
123 | * because they'll be done later by insertion sort. | ||
124 | * Doing lots of little insertion sorts might be a win | ||
125 | * on large datasets for locality-of-reference reasons, | ||
126 | * but it makes the code much nastier and increases | ||
127 | * bookkeeping overhead. | ||
128 | * 2. We always save the shorter and get to work on the | ||
129 | * longer. This guarantees that every time we push | ||
130 | * an item onto the stack its size is <= 1/2 of that | ||
131 | * of its parent; so the stack can't need more than | ||
132 | * log_2(max-array-size) entries. | ||
133 | * 3. We choose a pivot by looking at the first, last | ||
134 | * and middle elements. We arrange them into order | ||
135 | * because it's easy to do that in conjunction with | ||
136 | * choosing the pivot, and it makes things a little | ||
137 | * easier in the partitioning step. Anyway, the pivot | ||
138 | * is the middle of these three. It's still possible | ||
139 | * to construct datasets where the algorithm takes | ||
140 | * time of order n^2, but it simply never happens in | ||
141 | * practice. | ||
142 | * 3' Newsflash: On further investigation I find that | ||
143 | * it's easy to construct datasets where median-of-3 | ||
144 | * simply isn't good enough. So on large-ish subarrays | ||
145 | * we do a more sophisticated pivoting: we take three | ||
146 | * sets of 3 elements, find their medians, and then | ||
147 | * take the median of those. | ||
148 | * 4. We copy the pivot element to a separate place | ||
149 | * because that way we can always do our comparisons | ||
150 | * directly against a pointer to that separate place, | ||
151 | * and don't have to wonder "did we move the pivot | ||
152 | * element?". This makes the inner loop better. | ||
153 | * 5. It's possible to make the pivoting even more | ||
154 | * reliable by looking at more candidates when n | ||
155 | * is larger. (Taking this to its logical conclusion | ||
156 | * results in a variant of quicksort that doesn't | ||
157 | * have that n^2 worst case.) However, the overhead | ||
158 | * from the extra bookkeeping means that it's just | ||
159 | * not worth while. | ||
160 | * 6. This is pretty clean and portable code. Here are | ||
161 | * all the potential portability pitfalls and problems | ||
162 | * I know of: | ||
163 | * - In one place (the insertion sort) I construct | ||
164 | * a pointer that points just past the end of the | ||
165 | * supplied array, and assume that (a) it won't | ||
166 | * compare equal to any pointer within the array, | ||
167 | * and (b) it will compare equal to a pointer | ||
168 | * obtained by stepping off the end of the array. | ||
169 | * These might fail on some segmented architectures. | ||
170 | * - I assume that there are 8 bits in a |char| when | ||
171 | * computing the size of stack needed. This would | ||
172 | * fail on machines with 9-bit or 16-bit bytes. | ||
173 | * - I assume that if |((int)base&(sizeof(int)-1))==0| | ||
174 | * and |(size&(sizeof(int)-1))==0| then it's safe to | ||
175 | * get at array elements via |int*|s, and that if | ||
176 | * actually |size==sizeof(int)| as well then it's | ||
177 | * safe to treat the elements as |int|s. This might | ||
178 | * fail on systems that convert pointers to integers | ||
179 | * in non-standard ways. | ||
180 | * - I assume that |8*sizeof(size_t)<=INT_MAX|. This | ||
181 | * would be false on a machine with 8-bit |char|s, | ||
182 | * 16-bit |int|s and 4096-bit |size_t|s. :-) | ||
183 | */ | ||
184 | |||
185 | /* The recursion logic is the same in each case: */ | ||
186 | #define Recurse(Trunc) \ | ||
187 | { size_t l=last-ffirst,r=llast-first; \ | ||
188 | if (l<Trunc) { \ | ||
189 | if (r>=Trunc) doRight \ | ||
190 | else pop \ | ||
191 | } \ | ||
192 | else if (l<=r) { pushLeft; doRight } \ | ||
193 | else if (r>=Trunc) { pushRight; doLeft }\ | ||
194 | else doLeft \ | ||
195 | } | ||
196 | |||
197 | /* and so is the pivoting logic: */ | ||
198 | #define Pivot(swapper,sz) \ | ||
199 | if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\ | ||
200 | else { \ | ||
201 | if (compare(first,mid)<0) { \ | ||
202 | if (compare(mid,last)>0) { \ | ||
203 | swapper(mid,last); \ | ||
204 | if (compare(first,mid)>0) swapper(first,mid);\ | ||
205 | } \ | ||
206 | } \ | ||
207 | else { \ | ||
208 | if (compare(mid,last)>0) swapper(first,last)\ | ||
209 | else { \ | ||
210 | swapper(first,mid); \ | ||
211 | if (compare(mid,last)>0) swapper(mid,last);\ | ||
212 | } \ | ||
213 | } \ | ||
214 | first+=sz; last-=sz; \ | ||
215 | } | ||
216 | |||
217 | #ifdef DEBUG_QSORT | ||
218 | #include <stdio.h> | ||
219 | #endif | ||
220 | |||
221 | /* and so is the partitioning logic: */ | ||
222 | #define Partition(swapper,sz) { \ | ||
223 | int swapped=0; \ | ||
224 | do { \ | ||
225 | while (compare(first,pivot)<0) first+=sz; \ | ||
226 | while (compare(pivot,last)<0) last-=sz; \ | ||
227 | if (first<last) { \ | ||
228 | swapper(first,last); swapped=1; \ | ||
229 | first+=sz; last-=sz; } \ | ||
230 | else if (first==last) { first+=sz; last-=sz; break; }\ | ||
231 | } while (first<=last); \ | ||
232 | if (!swapped) pop \ | ||
233 | } | ||
234 | |||
235 | /* and so is the pre-insertion-sort operation of putting | ||
236 | * the smallest element into place as a sentinel. | ||
237 | * Doing this makes the inner loop nicer. I got this | ||
238 | * idea from the GNU implementation of qsort(). | ||
239 | */ | ||
240 | #define PreInsertion(swapper,limit,sz) \ | ||
241 | first=base; \ | ||
242 | last=first + (nmemb>limit ? limit : nmemb-1)*sz;\ | ||
243 | while (last!=base) { \ | ||
244 | if (compare(first,last)>0) first=last; \ | ||
245 | last-=sz; } \ | ||
246 | if (first!=base) swapper(first,(char*)base); | ||
247 | |||
248 | /* and so is the insertion sort, in the first two cases: */ | ||
249 | #define Insertion(swapper) \ | ||
250 | last=((char*)base)+nmemb*size; \ | ||
251 | for (first=((char*)base)+size;first!=last;first+=size) { \ | ||
252 | char *test; \ | ||
253 | /* Find the right place for |first|. \ | ||
254 | * My apologies for var reuse. */ \ | ||
255 | for (test=first-size;compare(test,first)>0;test-=size) ; \ | ||
256 | test+=size; \ | ||
257 | if (test!=first) { \ | ||
258 | /* Shift everything in [test,first) \ | ||
259 | * up by one, and place |first| \ | ||
260 | * where |test| is. */ \ | ||
261 | memcpy(pivot,first,size); \ | ||
262 | memmove(test+size,test,first-test); \ | ||
263 | memcpy(test,pivot,size); \ | ||
264 | } \ | ||
265 | } | ||
266 | |||
267 | #define SWAP_nonaligned(a,b) { \ | ||
268 | register char *aa=(a),*bb=(b); \ | ||
269 | register size_t sz=size; \ | ||
270 | do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); } | ||
271 | |||
272 | #define SWAP_aligned(a,b) { \ | ||
273 | register int *aa=(int*)(a),*bb=(int*)(b); \ | ||
274 | register size_t sz=size; \ | ||
275 | do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); } | ||
276 | |||
277 | #define SWAP_words(a,b) { \ | ||
278 | register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; } | ||
279 | |||
280 | /* ---------------------------------------------------------------------- */ | ||
281 | |||
282 | static char * pivot_big(char *first, char *mid, char *last, size_t size, | ||
283 | int compare(const void *, const void *)) { | ||
284 | size_t d=(((last-first)/size)>>3)*size; | ||
285 | char *m1,*m2,*m3; | ||
286 | { char *a=first, *b=first+d, *c=first+2*d; | ||
287 | #ifdef DEBUG_QSORT | ||
288 | fprintf(stderr,"< %d %d %d\n",*(int*)a,*(int*)b,*(int*)c); | ||
289 | #endif | ||
290 | m1 = compare(a,b)<0 ? | ||
291 | (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) | ||
292 | : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); | ||
293 | } | ||
294 | { char *a=mid-d, *b=mid, *c=mid+d; | ||
295 | #ifdef DEBUG_QSORT | ||
296 | fprintf(stderr,". %d %d %d\n",*(int*)a,*(int*)b,*(int*)c); | ||
297 | #endif | ||
298 | m2 = compare(a,b)<0 ? | ||
299 | (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) | ||
300 | : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); | ||
301 | } | ||
302 | { char *a=last-2*d, *b=last-d, *c=last; | ||
303 | #ifdef DEBUG_QSORT | ||
304 | fprintf(stderr,"> %d %d %d\n",*(int*)a,*(int*)b,*(int*)c); | ||
305 | #endif | ||
306 | m3 = compare(a,b)<0 ? | ||
307 | (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) | ||
308 | : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); | ||
309 | } | ||
310 | #ifdef DEBUG_QSORT | ||
311 | fprintf(stderr,"-> %d %d %d\n",*(int*)m1,*(int*)m2,*(int*)m3); | ||
312 | #endif | ||
313 | return compare(m1,m2)<0 ? | ||
314 | (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1)) | ||
315 | : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2)); | ||
316 | } | ||
317 | |||
318 | /* ---------------------------------------------------------------------- */ | ||
319 | |||
320 | static void qsort_nonaligned(void *base, size_t nmemb, size_t size, | ||
321 | int (*compare)(const void *, const void *)) { | ||
322 | |||
323 | stack_entry stack[STACK_SIZE]; | ||
324 | int stacktop=0; | ||
325 | char *first,*last; | ||
326 | char *pivot=malloc(size); | ||
327 | size_t trunc=TRUNC_nonaligned*size; | ||
328 | assert(pivot!=0); | ||
329 | |||
330 | first=(char*)base; last=first+(nmemb-1)*size; | ||
331 | |||
332 | if ((size_t)(last-first)>trunc) { | ||
333 | char *ffirst=first, *llast=last; | ||
334 | while (1) { | ||
335 | /* Select pivot */ | ||
336 | { char * mid=first+size*((last-first)/size >> 1); | ||
337 | Pivot(SWAP_nonaligned,size); | ||
338 | memcpy(pivot,mid,size); | ||
339 | } | ||
340 | /* Partition. */ | ||
341 | Partition(SWAP_nonaligned,size); | ||
342 | /* Prepare to recurse/iterate. */ | ||
343 | Recurse(trunc) | ||
344 | } | ||
345 | } | ||
346 | PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size); | ||
347 | Insertion(SWAP_nonaligned); | ||
348 | free(pivot); | ||
349 | } | ||
350 | |||
351 | static void qsort_aligned(void *base, size_t nmemb, size_t size, | ||
352 | int (*compare)(const void *, const void *)) { | ||
353 | |||
354 | stack_entry stack[STACK_SIZE]; | ||
355 | int stacktop=0; | ||
356 | char *first,*last; | ||
357 | char *pivot=malloc(size); | ||
358 | size_t trunc=TRUNC_aligned*size; | ||
359 | assert(pivot!=0); | ||
360 | |||
361 | first=(char*)base; last=first+(nmemb-1)*size; | ||
362 | |||
363 | if ((size_t)(last-first)>trunc) { | ||
364 | char *ffirst=first,*llast=last; | ||
365 | while (1) { | ||
366 | /* Select pivot */ | ||
367 | { char * mid=first+size*((last-first)/size >> 1); | ||
368 | Pivot(SWAP_aligned,size); | ||
369 | memcpy(pivot,mid,size); | ||
370 | } | ||
371 | /* Partition. */ | ||
372 | Partition(SWAP_aligned,size); | ||
373 | /* Prepare to recurse/iterate. */ | ||
374 | Recurse(trunc) | ||
375 | } | ||
376 | } | ||
377 | PreInsertion(SWAP_aligned,TRUNC_aligned,size); | ||
378 | Insertion(SWAP_aligned); | ||
379 | free(pivot); | ||
380 | } | ||
381 | |||
382 | static void qsort_words(void *base, size_t nmemb, | ||
383 | int (*compare)(const void *, const void *)) { | ||
384 | |||
385 | stack_entry stack[STACK_SIZE]; | ||
386 | int stacktop=0; | ||
387 | char *first,*last; | ||
388 | char *pivot=malloc(WORD_BYTES); | ||
389 | assert(pivot!=0); | ||
390 | |||
391 | first=(char*)base; last=first+(nmemb-1)*WORD_BYTES; | ||
392 | |||
393 | if (last-first>TRUNC_words) { | ||
394 | char *ffirst=first, *llast=last; | ||
395 | while (1) { | ||
396 | #ifdef DEBUG_QSORT | ||
397 | fprintf(stderr,"Doing %d:%d: ", | ||
398 | (first-(char*)base)/WORD_BYTES, | ||
399 | (last-(char*)base)/WORD_BYTES); | ||
400 | #endif | ||
401 | /* Select pivot */ | ||
402 | { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES)); | ||
403 | Pivot(SWAP_words,WORD_BYTES); | ||
404 | *(int*)pivot=*(int*)mid; | ||
405 | } | ||
406 | #ifdef DEBUG_QSORT | ||
407 | fprintf(stderr,"pivot=%d\n",*(int*)pivot); | ||
408 | #endif | ||
409 | /* Partition. */ | ||
410 | Partition(SWAP_words,WORD_BYTES); | ||
411 | /* Prepare to recurse/iterate. */ | ||
412 | Recurse(TRUNC_words) | ||
413 | } | ||
414 | } | ||
415 | PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES); | ||
416 | /* Now do insertion sort. */ | ||
417 | last=((char*)base)+nmemb*WORD_BYTES; | ||
418 | for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) { | ||
419 | /* Find the right place for |first|. My apologies for var reuse */ | ||
420 | int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first; | ||
421 | *(int*)pivot=*(int*)first; | ||
422 | for (;compare(pl,pivot)>0;pr=pl,--pl) { | ||
423 | *pr=*pl; } | ||
424 | if (pr!=(int*)first) *pr=*(int*)pivot; | ||
425 | } | ||
426 | free(pivot); | ||
427 | } | ||
428 | |||
429 | /* ---------------------------------------------------------------------- */ | ||
430 | |||
431 | void qsort(void *base, size_t nmemb, size_t size, | ||
432 | int (*compare)(const void *, const void *)) { | ||
433 | |||
434 | if (nmemb<=1) return; | ||
435 | if (((uintptr_t)base|size)&(WORD_BYTES-1)) | ||
436 | qsort_nonaligned(base,nmemb,size,compare); | ||
437 | else if (size!=WORD_BYTES) | ||
438 | qsort_aligned(base,nmemb,size,compare); | ||
439 | else | ||
440 | qsort_words(base,nmemb,compare); | ||
441 | } | ||
442 | |||
443 | #endif /* !HAVE_QSORT */ | ||