xref: /illumos-gate/usr/src/cmd/sort/internal.c (revision 101e15b5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include "internal.h"
27 
28 #define	INSERTION_THRESHOLD	12
29 
30 static void
swap_range(int i,int j,int n,line_rec_t ** I)31 swap_range(int i, int j, int n, line_rec_t **I)
32 {
33 	while (n-- > 0) {
34 		line_rec_t *t;
35 
36 		t = I[i];
37 		I[i++] = I[j];
38 		I[j++] = t;
39 	}
40 }
41 
42 /*
43  * offset_is_algorithm() implements a simple insertion sort on line records that
44  * allows comparison from an offset into the l_collate field (since the subfiles
45  * should have already been sorted to that depth).
46  */
47 static void
offset_is_algorithm(line_rec_t ** X,ssize_t n,int (* collate_fcn)(line_rec_t *,line_rec_t *,ssize_t,flag_t),ssize_t depth,flag_t coll_flags)48 offset_is_algorithm(line_rec_t **X, ssize_t n,
49     int (*collate_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t),
50     ssize_t depth, flag_t coll_flags)
51 {
52 	ssize_t i;
53 
54 	__S(stats_incr_subfiles());
55 
56 	/*
57 	 * Place lowest element in X[0].
58 	 */
59 	for (i = 0; i < n; i++) {
60 		if (collate_fcn(X[0], X[i], depth, coll_flags) > 0) {
61 			swap((void **)&X[0], (void **)&X[i]);
62 			ASSERT(collate_fcn(X[0], X[i], depth, coll_flags) <= 0);
63 		}
64 	}
65 
66 	/*
67 	 * Insertion sort.
68 	 */
69 	for (i = 2; i < n; i++) {
70 		ssize_t j = i;
71 		line_rec_t *t = X[i];
72 		while (collate_fcn(t, X[j - 1], depth, coll_flags) < 0) {
73 			X[j] = X[j - 1];
74 			j--;
75 			ASSERT(j > 0);
76 		}
77 		X[j] = t;
78 	}
79 }
80 
81 /*
82  * tqs_algorithm() is called from rqs_algorithm() when a subpartition is
83  * encountered whose line records share indistinguishable l_collate fields.  It
84  * implements a semi-iterative ternary quicksort.
85  */
86 static void
tqs_algorithm(line_rec_t ** X,ssize_t n,int (* collate_fcn)(line_rec_t *,line_rec_t *,ssize_t,flag_t),flag_t coll_flags)87 tqs_algorithm(line_rec_t **X, ssize_t n,
88     int (*collate_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t),
89     flag_t coll_flags)
90 {
91 	ssize_t	l;			/* boundary of left partition */
92 	ssize_t	le;			/* boundary of left equal partition */
93 	ssize_t	r;			/* boundary of right partition */
94 	ssize_t	re;			/* boundary of right equal partition */
95 
96 	ssize_t	p, q;			/* scratch for indices, comparisons */
97 
98 	coll_flags |= COLL_DATA_ONLY;
99 
100 tqs_start:
101 
102 	/*
103 	 * completion criteria
104 	 */
105 	if (n <= 1)
106 		return;
107 
108 	if (n <= INSERTION_THRESHOLD) {
109 		offset_is_algorithm(X, n, collate_fcn, 0, coll_flags);
110 		return;
111 	}
112 
113 	/*
114 	 * selection of partition element
115 	 */
116 	le = rand() % n;
117 	swap((void **)&X[0], (void **)&X[le]);
118 
119 	le = l = 1;
120 	r = re = n - 1;
121 
122 	for (;;) {
123 		while (l <= r &&
124 		    (p = collate_fcn(X[l], X[0], 0, coll_flags)) <= 0) {
125 			if (p == 0)
126 				swap((void **)&X[le++], (void **)&X[l]);
127 			l++;
128 		}
129 
130 		while (l <= r &&
131 		    (p = collate_fcn(X[r], X[0], 0, coll_flags)) >= 0) {
132 			if (p == 0)
133 				swap((void **)&X[r], (void **)&X[re--]);
134 			r--;
135 		}
136 
137 		if (l > r)
138 			break;
139 
140 		swap((void **)&X[l++], (void **)&X[r--]);
141 	}
142 
143 	/*
144 	 * swap equal partitions into middle
145 	 */
146 	p = MIN(le, l - le);
147 	swap_range(0, l - p, p, X);
148 	p = MIN(re - r, n - re - 1);
149 	swap_range(l, n - p, p, X);
150 
151 	/*
152 	 * Iterate with larger subpartition, recurse into smaller.
153 	 */
154 	p = l - le;
155 	q = re - r;
156 
157 	if (p > q) {
158 		tqs_algorithm(&X[n - q], q, collate_fcn, coll_flags);
159 
160 		n = p;
161 	} else {
162 		tqs_algorithm(X, p, collate_fcn, coll_flags);
163 
164 		X = &X[n - q];
165 		n = q;
166 	}
167 
168 	goto tqs_start;
169 	/*NOTREACHED*/
170 }
171 
172 /*
173  * The following semi-iterative radix quicksort is derived from that presented
174  * in
175  *	J. Bentley and R. Sedgewick, Fast Algorithms for Sorting and Searching
176  *	Strings, in Eighth Annual ACM-SIAM Symposium on Discrete Algorithms,
177  *	1997 (SODA 1997),
178  * and
179  *	R. Sedgewick, Algorithms in C, 3rd ed., vol. 1, Addison-Wesley, 1998.
180  */
181 
182 static void
rqs_algorithm(line_rec_t ** X,ssize_t n,ssize_t depth,int (* collate_fcn)(line_rec_t *,line_rec_t *,ssize_t,flag_t),flag_t coll_flags)183 rqs_algorithm(line_rec_t **X, ssize_t n, ssize_t depth,
184     int (*collate_fcn)(line_rec_t *, line_rec_t *, ssize_t, flag_t),
185     flag_t coll_flags)
186 {
187 	uchar_t v;			/* partition radix value */
188 
189 	ssize_t	l;			/* boundary of left partition */
190 	ssize_t	le;			/* boundary of left equal partition */
191 	ssize_t	r;			/* boundary of right partition */
192 	ssize_t	re;			/* boundary of right equal partition */
193 
194 	ssize_t	p;			/* scratch for indices, comparisons */
195 	line_rec_t *t;			/* scratch for swaps */
196 
197 rqs_start:
198 
199 	/*
200 	 * completion criteria
201 	 */
202 	if (n <= 1)
203 		return;
204 
205 	if (n <= INSERTION_THRESHOLD) {
206 		offset_is_algorithm(X, n, collate_fcn, depth, coll_flags);
207 		return;
208 	}
209 
210 	/*
211 	 * selection of partition element
212 	 */
213 	le = rand() % n;
214 	swap((void **)&X[0], (void **)&X[le]);
215 	v = X[0]->l_collate.usp[depth];
216 
217 	le = l = 1;
218 	r = re = n - 1;
219 
220 	for (;;) {
221 		while (l <= r &&
222 		    (p = *(X[l]->l_collate.usp + depth) - v) <= 0) {
223 			if (p == 0) {
224 				t = X[le];
225 				X[le] = X[l];
226 				X[l] = t;
227 				le++;
228 			}
229 			(l)++;
230 		}
231 
232 		while (l <= r &&
233 		    (p = *(X[r]->l_collate.usp + depth) - v) >= 0) {
234 			if (p == 0) {
235 				t = X[r];
236 				X[r] = X[re];
237 				X[re] = t;
238 				(re)--;
239 			}
240 			(r)--;
241 		}
242 
243 		if (l > r)
244 			break;
245 
246 		t = X[l];
247 		X[l] = X[r];
248 		X[r] = t;
249 		(l)++;
250 		(r)--;
251 	}
252 
253 	/*
254 	 * swap equal partitions into middle
255 	 */
256 	p = MIN(le, l - le);
257 	swap_range(0, l - p, p, X);
258 	p = MIN(re - r, n - re - 1);
259 	swap_range(l, n - p, p, X);
260 
261 	/*
262 	 * recurse into subpartitions as necessary
263 	 */
264 	p = re - r;
265 	if (p > 0)
266 		rqs_algorithm(&X[n - p], p, depth, collate_fcn, coll_flags);
267 
268 	p = l - le;
269 	if (p > 0)
270 		rqs_algorithm(X, p, depth, collate_fcn, coll_flags);
271 
272 	if (le + n - re - 1 <= 1)
273 		return;
274 
275 	/*
276 	 * - 1 so that we don't count the final null.
277 	 */
278 	if (X[p]->l_collate_length - 1 > depth) {
279 		/*
280 		 * Equivalent recursion: rqs_algorithm(&X[p], le + n - re - 1,
281 		 * depth + 1, collate_fcn, coll_only);
282 		 */
283 		X = &X[p];
284 		n = le + n - re - 1;
285 		depth++;
286 		goto rqs_start;
287 	}
288 
289 	if (!(coll_flags & COLL_UNIQUE)) {
290 		__S(stats_incr_tqs_calls());
291 		tqs_algorithm(&X[p], le + n - re - 1, collate_fcn, coll_flags);
292 	}
293 }
294 
295 static void
radix_quicksort(stream_t * C,flag_t coll_flags)296 radix_quicksort(stream_t *C, flag_t coll_flags)
297 {
298 	ASSERT((C->s_status & STREAM_SOURCE_MASK) == STREAM_ARRAY);
299 
300 	if (C->s_element_size == sizeof (char))
301 		rqs_algorithm(C->s_type.LA.s_array, C->s_type.LA.s_array_size,
302 		    0, collated, coll_flags);
303 	else
304 		rqs_algorithm(C->s_type.LA.s_array, C->s_type.LA.s_array_size,
305 		    0, collated_wide, coll_flags);
306 }
307 
308 void
internal_sort(sort_t * S)309 internal_sort(sort_t *S)
310 {
311 	size_t input_mem, sort_mem;
312 	size_t prev_sort_mem = 0;
313 	void *prev_sort_buf = NULL;
314 
315 	int numerator, denominator;
316 	int memory_left;
317 	int currently_primed;
318 	flag_t coll_flags;
319 
320 	stream_t *sort_stream = NULL;
321 	stream_t *cur_stream;
322 
323 	set_memory_ratio(S, &numerator, &denominator);
324 	set_cleanup_chain(&S->m_temporary_streams);
325 
326 	if (S->m_field_options & FIELD_REVERSE_COMPARISONS)
327 		coll_flags = COLL_REVERSE;
328 	else
329 		coll_flags = 0;
330 
331 	/*
332 	 * For the entire line special case, we can speed comparisons by
333 	 * recognizing that the collation vector contains all the information
334 	 * required to order the line against other lines of the file.
335 	 * COLL_UNIQUE provides such an exit; if we use the standard put-line
336 	 * operation for the output stream, we achieve the desired fast path.
337 	 */
338 	if (S->m_entire_line)
339 		coll_flags |= COLL_UNIQUE;
340 
341 	hold_file_descriptor();
342 
343 	cur_stream = S->m_input_streams;
344 	while (cur_stream != NULL) {
345 		if (!(cur_stream->s_status & STREAM_OPEN)) {
346 			if (stream_open_for_read(S, cur_stream) == -1)
347 				die(EMSG_DESCRIPTORS);
348 		}
349 
350 		if (cur_stream->s_status & STREAM_MMAP) {
351 			input_mem = 0;
352 		} else {
353 			input_mem = (size_t)(((u_longlong_t)numerator *
354 			    S->m_memory_available) / denominator);
355 			stream_set_size(cur_stream, input_mem);
356 		}
357 
358 		sort_mem = S->m_memory_available - input_mem;
359 
360 		currently_primed = SOP_PRIME(cur_stream);
361 
362 		sort_stream = safe_realloc(sort_stream, sizeof (stream_t));
363 		stream_clear(sort_stream);
364 		sort_stream->s_buffer = prev_sort_buf;
365 		sort_stream->s_buffer_size = prev_sort_mem;
366 		stream_set(sort_stream, STREAM_OPEN | STREAM_ARRAY);
367 		sort_stream->s_element_size = S->m_single_byte_locale ?
368 		    sizeof (char) : sizeof (wchar_t);
369 		stream_set_size(sort_stream, sort_mem);
370 		prev_sort_buf = sort_stream->s_buffer;
371 		prev_sort_mem = sort_stream->s_buffer_size;
372 
373 		for (;;) {
374 			if (currently_primed == PRIME_SUCCEEDED) {
375 				memory_left =
376 				    stream_insert(S, cur_stream, sort_stream);
377 
378 				if (memory_left != ST_MEM_AVAIL)
379 					break;
380 			}
381 
382 			if (SOP_EOS(cur_stream)) {
383 				if (cur_stream->s_consumer == NULL) {
384 					(void) SOP_FREE(cur_stream);
385 					(void) SOP_CLOSE(cur_stream);
386 				}
387 
388 				cur_stream = cur_stream->s_next;
389 
390 				if (cur_stream == NULL)
391 					break;
392 
393 				if (!(cur_stream->s_status & STREAM_OPEN) &&
394 				    (stream_open_for_read(S, cur_stream) == -1))
395 					break;
396 
397 				if (!(cur_stream->s_status & STREAM_MMAP)) {
398 					input_mem = numerator *
399 					    S->m_memory_available / denominator;
400 					stream_set_size(cur_stream,
401 					    input_mem);
402 				}
403 				currently_primed = SOP_PRIME(cur_stream);
404 			}
405 		}
406 
407 		radix_quicksort(sort_stream, coll_flags);
408 
409 #ifndef DEBUG_NO_CACHE_TEMP
410 		/*
411 		 * cur_stream is set to NULL only when memory isn't filled and
412 		 * no more input streams remain.  In this case, we can safely
413 		 * cache the sort results.
414 		 *
415 		 * Otherwise, we have either exhausted available memory or
416 		 * available file descriptors.  If we've use all the available
417 		 * file descriptors, we aren't able to open the next input file.
418 		 * In this case, we can't cache the sort results, because more
419 		 * input files remain.
420 		 *
421 		 * If memory was filled, then there must be at least one
422 		 * leftover line unprocessed in the input stream, even though
423 		 * stream will indicated EOS if asked. We can't cache in this
424 		 * case, as we need one more round for the pending line or
425 		 * lines.
426 		 */
427 		if (cur_stream == NULL) {
428 			(void) stream_push_to_temporary(&S->m_temporary_streams,
429 			    sort_stream, ST_CACHE |
430 			    (S->m_single_byte_locale ? 0 : ST_WIDE));
431 			break;
432 		} else {
433 #endif
434 			release_file_descriptor();
435 			(void) stream_push_to_temporary(&S->m_temporary_streams,
436 			    sort_stream, ST_NOCACHE |
437 			    (S->m_single_byte_locale ? 0 : ST_WIDE));
438 
439 			hold_file_descriptor();
440 #ifdef DEBUG_NO_CACHE_TEMP
441 			if (cur_stream == NULL)
442 				break;
443 #endif
444 			stream_unset(cur_stream, STREAM_NOT_FREEABLE);
445 			stream_close_all_previous(cur_stream);
446 			cur_stream->s_consumer = NULL;
447 #ifndef DEBUG_NO_CACHE_TEMP
448 		}
449 #endif
450 	}
451 
452 	release_file_descriptor();
453 }
454