17c478bdstevel@tonic-gate/*
27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
57c478bdstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
67c478bdstevel@tonic-gate * (the "License").  You may not use this file except in compliance
77c478bdstevel@tonic-gate * with the License.
87c478bdstevel@tonic-gate *
97c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
117c478bdstevel@tonic-gate * See the License for the specific language governing permissions
127c478bdstevel@tonic-gate * and limitations under the License.
137c478bdstevel@tonic-gate *
147c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
157c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
177c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
187c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bdstevel@tonic-gate *
207c478bdstevel@tonic-gate * CDDL HEADER END
217c478bdstevel@tonic-gate */
227c478bdstevel@tonic-gate/*
237c478bdstevel@tonic-gate * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
247c478bdstevel@tonic-gate * Use is subject to license terms.
257c478bdstevel@tonic-gate */
267c478bdstevel@tonic-gate
277c478bdstevel@tonic-gate#ifndef	_SORT_TYPES_H
287c478bdstevel@tonic-gate#define	_SORT_TYPES_H
297c478bdstevel@tonic-gate
307c478bdstevel@tonic-gate#pragma ident	"%Z%%M%	%I%	%E% SMI"
317c478bdstevel@tonic-gate
327c478bdstevel@tonic-gate#ifdef	__cplusplus
337c478bdstevel@tonic-gateextern "C" {
347c478bdstevel@tonic-gate#endif
357c478bdstevel@tonic-gate
367c478bdstevel@tonic-gate#include <sys/resource.h>
377c478bdstevel@tonic-gate#include <sys/types.h>
387c478bdstevel@tonic-gate#include <limits.h>
397c478bdstevel@tonic-gate#include <stdio.h>
407c478bdstevel@tonic-gate#include <stdlib.h>
417c478bdstevel@tonic-gate
427c478bdstevel@tonic-gatetypedef	int flag_t;
437c478bdstevel@tonic-gate
447c478bdstevel@tonic-gatetypedef	int (*cmp_fcn_t)(void *, void *, flag_t);
457c478bdstevel@tonic-gate
467c478bdstevel@tonic-gatetypedef union vchar {
477c478bdstevel@tonic-gate	char	sc;
487c478bdstevel@tonic-gate	uchar_t	usc;
497c478bdstevel@tonic-gate	wchar_t	wc;
507c478bdstevel@tonic-gate} vchar_t;
517c478bdstevel@tonic-gate
527c478bdstevel@tonic-gatetypedef union vcharptr {
537c478bdstevel@tonic-gate	char	*sp;
547c478bdstevel@tonic-gate	uchar_t	*usp;
557c478bdstevel@tonic-gate	wchar_t *wp;
567c478bdstevel@tonic-gate} vcharptr_t;
577c478bdstevel@tonic-gate
587c478bdstevel@tonic-gatetypedef struct line_rec {
597c478bdstevel@tonic-gate	vcharptr_t l_data;		/* raw data */
607c478bdstevel@tonic-gate	vcharptr_t l_raw_collate;	/* collatable raw data */
617c478bdstevel@tonic-gate	vcharptr_t l_collate;		/* key-ordered collatable string */
627c478bdstevel@tonic-gate	ssize_t	l_data_length;
637c478bdstevel@tonic-gate	ssize_t	l_collate_length;
647c478bdstevel@tonic-gate	ssize_t	l_collate_bufsize;
657c478bdstevel@tonic-gate} line_rec_t;
667c478bdstevel@tonic-gate
677c478bdstevel@tonic-gateenum field_species {
687c478bdstevel@tonic-gate	ALPHA,
697c478bdstevel@tonic-gate	MONTH,
707c478bdstevel@tonic-gate	NUMERIC
717c478bdstevel@tonic-gate};
727c478bdstevel@tonic-gate
737c478bdstevel@tonic-gate#define	FIELD_DICTIONARY_ORDER		0x1
747c478bdstevel@tonic-gate#define	FIELD_FOLD_UPPERCASE		0x2
757c478bdstevel@tonic-gate#define	FIELD_IGNORE_NONPRINTABLES	0x4
767c478bdstevel@tonic-gate#define	FIELD_IGNORE_BLANKS_START	0x8
777c478bdstevel@tonic-gate#define	FIELD_IGNORE_BLANKS_END		0x10
787c478bdstevel@tonic-gate
797c478bdstevel@tonic-gate#define	FIELD_REVERSE_COMPARISONS	0x20
807c478bdstevel@tonic-gate
817c478bdstevel@tonic-gate#define	FIELD_MODIFIERS_DEFINED		0x40
827c478bdstevel@tonic-gate
837c478bdstevel@tonic-gatetypedef struct field {
847c478bdstevel@tonic-gate	struct field		*f_next;
857c478bdstevel@tonic-gate
867c478bdstevel@tonic-gate	/*
877c478bdstevel@tonic-gate	 * field ops vector
887c478bdstevel@tonic-gate	 */
897c478bdstevel@tonic-gate	ssize_t			(*f_convert)(struct field *, line_rec_t *,
907c478bdstevel@tonic-gate	    vchar_t, ssize_t, ssize_t, ssize_t);
917c478bdstevel@tonic-gate	enum field_species	f_species;
927c478bdstevel@tonic-gate
937c478bdstevel@tonic-gate	/*
947c478bdstevel@tonic-gate	 * starting and ending fields, and offsets
957c478bdstevel@tonic-gate	 */
967c478bdstevel@tonic-gate	int			f_start_field;
977c478bdstevel@tonic-gate	ssize_t			f_start_offset;
987c478bdstevel@tonic-gate
997c478bdstevel@tonic-gate	int			f_end_field;
1007c478bdstevel@tonic-gate	ssize_t			f_end_offset;
1017c478bdstevel@tonic-gate
1027c478bdstevel@tonic-gate	flag_t			f_options;
1037c478bdstevel@tonic-gate} field_t;
1047c478bdstevel@tonic-gate
1057c478bdstevel@tonic-gate#define	STREAM_SOURCE_MASK	0x000f
1067c478bdstevel@tonic-gate#define	STREAM_NO_SOURCE	0x0000
1077c478bdstevel@tonic-gate#define	STREAM_ARRAY		0x0001
1087c478bdstevel@tonic-gate#define	STREAM_MMAP		0x0002
1097c478bdstevel@tonic-gate#define	STREAM_SINGLE		0x0004
1107c478bdstevel@tonic-gate#define	STREAM_WIDE		0x0008
1117c478bdstevel@tonic-gate
1127c478bdstevel@tonic-gate#define	STREAM_OPEN		0x0010
1137c478bdstevel@tonic-gate#define	STREAM_PRIMED		0x0020
1147c478bdstevel@tonic-gate
1157c478bdstevel@tonic-gate#define	STREAM_OUTPUT		0x0040
1167c478bdstevel@tonic-gate#define	STREAM_EOS_REACHED	0x0080
1177c478bdstevel@tonic-gate#define	STREAM_NOTFILE		0x0100
1187c478bdstevel@tonic-gate#define	STREAM_UNIQUE		0x0200
1197c478bdstevel@tonic-gate#define	STREAM_INSTANT		0x0400
1207c478bdstevel@tonic-gate#define	STREAM_TEMPORARY	0x0800
1217c478bdstevel@tonic-gate#define	STREAM_NOT_FREEABLE	0x1000
1227c478bdstevel@tonic-gate
1237c478bdstevel@tonic-gate#define	DEFAULT_INPUT_SIZE	(1 * MEGABYTE)
1247c478bdstevel@tonic-gate#define	DEFAULT_RELEASE_SIZE	(MEGABYTE / 2)
1257c478bdstevel@tonic-gate
1267c478bdstevel@tonic-gate#define	CHAR_AVG_LINE	32
1277c478bdstevel@tonic-gate#define	WCHAR_AVG_LINE	(sizeof (wchar_t) * CHAR_AVG_LINE)
1287c478bdstevel@tonic-gate#define	XFRM_MULTIPLIER	8
1297c478bdstevel@tonic-gate
1307c478bdstevel@tonic-gate#define	NEXT_LINE_COMPLETE	0x0
1317c478bdstevel@tonic-gate#define	NEXT_LINE_INCOMPLETE	0x1
1327c478bdstevel@tonic-gate
1337c478bdstevel@tonic-gate#define	PRIME_SUCCEEDED		0x0
1347c478bdstevel@tonic-gate#define	PRIME_FAILED_EMPTY_FILE	0x1
1357c478bdstevel@tonic-gate#define	PRIME_FAILED		0x2
1367c478bdstevel@tonic-gate
1377c478bdstevel@tonic-gatetypedef struct stream_array {
1387c478bdstevel@tonic-gate	line_rec_t	**s_array;
1397c478bdstevel@tonic-gate	ssize_t		s_array_size;
1407c478bdstevel@tonic-gate	ssize_t		s_cur_index;
1417c478bdstevel@tonic-gate} stream_array_t;
1427c478bdstevel@tonic-gate
1437c478bdstevel@tonic-gatetypedef struct stream_simple_file {
1447c478bdstevel@tonic-gate	/*
1457c478bdstevel@tonic-gate	 * stream_simple_file_t is used for STREAM_MMAP and for STREAM_OUTPUT
1467c478bdstevel@tonic-gate	 * for either single- (STREAM_SINGLE | STREAM_OUTPUT) or multi-byte
1477c478bdstevel@tonic-gate	 * (STREAM_WIDE | STREAM_OUTPUT) locales.
1487c478bdstevel@tonic-gate	 */
1497c478bdstevel@tonic-gate	int		s_fd;			/* file descriptor */
1507c478bdstevel@tonic-gate	caddr_t		s_release_origin;	/* start for next madvise(3C) */
1517c478bdstevel@tonic-gate} stream_simple_file_t;
1527c478bdstevel@tonic-gate
1537c478bdstevel@tonic-gatetypedef struct stream_buffered_file {
1547c478bdstevel@tonic-gate	/*
1557c478bdstevel@tonic-gate	 * stream_buffered_file_t is used for both STREAM_STDIO and
1567c478bdstevel@tonic-gate	 * STREAM_WIDE.
1577c478bdstevel@tonic-gate	 */
1587c478bdstevel@tonic-gate	FILE		*s_fp;			/* file stream */
1597c478bdstevel@tonic-gate	void		*s_vbuf;		/* stdio alternate buffer */
1607c478bdstevel@tonic-gate	size_t		s_bytes_used;
1617c478bdstevel@tonic-gate} stream_buffered_file_t;
1627c478bdstevel@tonic-gate
1637c478bdstevel@tonic-gatetypedef union stream_type {
1647c478bdstevel@tonic-gate	stream_array_t		LA;	/* array of line records */
1657c478bdstevel@tonic-gate	stream_simple_file_t	SF;	/* file accessed via mmap */
1667c478bdstevel@tonic-gate	stream_buffered_file_t	BF;	/* file accessed via stdio */
1677c478bdstevel@tonic-gate} stream_type_t;
1687c478bdstevel@tonic-gate
1697c478bdstevel@tonic-gatestruct stream;
1707c478bdstevel@tonic-gate
1717c478bdstevel@tonic-gatetypedef struct stream_ops {
1727c478bdstevel@tonic-gate	int	(*sop_is_closable)(struct stream *);
1737c478bdstevel@tonic-gate	int	(*sop_close)(struct stream *);
1747c478bdstevel@tonic-gate	int	(*sop_eos)(struct stream *);
1757c478bdstevel@tonic-gate	ssize_t	(*sop_fetch)(struct stream *);
1767c478bdstevel@tonic-gate	void	(*sop_flush)(struct stream *);
1777c478bdstevel@tonic-gate	int	(*sop_free)(struct stream *);
1787c478bdstevel@tonic-gate	int	(*sop_open_for_write)(struct stream *);
1797c478bdstevel@tonic-gate	int	(*sop_prime)(struct stream *);
1807c478bdstevel@tonic-gate	void	(*sop_put_line)(struct stream *, line_rec_t *);
1817c478bdstevel@tonic-gate	void	(*sop_release_line)(struct stream *);
1827c478bdstevel@tonic-gate	void	(*sop_send_eol)(struct stream *);
1837c478bdstevel@tonic-gate	int	(*sop_unlink)(struct stream *);
1847c478bdstevel@tonic-gate} stream_ops_t;
1857c478bdstevel@tonic-gate
1867c478bdstevel@tonic-gate#define	SOP_IS_CLOSABLE(s)	((s)->s_ops.sop_is_closable)(s)
1877c478bdstevel@tonic-gate#define	SOP_CLOSE(s)		((s)->s_ops.sop_close)(s)
1887c478bdstevel@tonic-gate#define	SOP_EOS(s)		((s)->s_ops.sop_eos)(s)
1897c478bdstevel@tonic-gate#define	SOP_FETCH(s)		((s)->s_ops.sop_fetch)(s)
1907c478bdstevel@tonic-gate#define	SOP_FLUSH(s)		((s)->s_ops.sop_flush)(s)
1917c478bdstevel@tonic-gate#define	SOP_FREE(s)		((s)->s_ops.sop_free)(s)
1927c478bdstevel@tonic-gate#define	SOP_OPEN_FOR_WRITE(s)	((s)->s_ops.sop_open_for_write)(s)
1937c478bdstevel@tonic-gate#define	SOP_PRIME(s)		((s)->s_ops.sop_prime)(s)
1947c478bdstevel@tonic-gate#define	SOP_PUT_LINE(s, l)	((s)->s_ops.sop_put_line)(s, l)
1957c478bdstevel@tonic-gate#define	SOP_RELEASE_LINE(s)	((s)->s_ops.sop_release_line)(s)
1967c478bdstevel@tonic-gate#define	SOP_SEND_EOL(s)		((s)->s_ops.sop_send_eol)(s)
1977c478bdstevel@tonic-gate#define	SOP_UNLINK(s)		((s)->s_ops.sop_unlink)(s)
1987c478bdstevel@tonic-gate
1997c478bdstevel@tonic-gate/*
2007c478bdstevel@tonic-gate * The stream_t type is provided to simplify access to files, particularly for
2017c478bdstevel@tonic-gate * external merges.
2027c478bdstevel@tonic-gate */
2037c478bdstevel@tonic-gatetypedef struct stream {
2047c478bdstevel@tonic-gate	struct stream	*s_consumer;	/* dependent on s_buffer */
2057c478bdstevel@tonic-gate	struct stream	*s_previous;
2067c478bdstevel@tonic-gate	struct stream	*s_next;
2077c478bdstevel@tonic-gate
2087c478bdstevel@tonic-gate	char		*s_filename;
2097c478bdstevel@tonic-gate
2107c478bdstevel@tonic-gate	line_rec_t	s_current;	/* present line buffers */
2117c478bdstevel@tonic-gate	stream_ops_t	s_ops;		/* type-specific ops vector */
2127c478bdstevel@tonic-gate	stream_type_t	s_type;		/* type-specific attributes */
2137c478bdstevel@tonic-gate
2147c478bdstevel@tonic-gate	void		*s_buffer;
2157c478bdstevel@tonic-gate	size_t		s_buffer_size;
2167c478bdstevel@tonic-gate	off_t		s_filesize;
2177c478bdstevel@tonic-gate	size_t		s_element_size;
2187c478bdstevel@tonic-gate	flag_t		s_status;	/* flags */
2197c478bdstevel@tonic-gate	ino_t		s_ino;
2207c478bdstevel@tonic-gate	dev_t		s_dev;
2217c478bdstevel@tonic-gate} stream_t;
2227c478bdstevel@tonic-gate
2237c478bdstevel@tonic-gate/*
2247c478bdstevel@tonic-gate * sort(1) has, for debugging purposes, a primitive compile-time option to
2257c478bdstevel@tonic-gate * generate statistics of various operations executed during an invocation.
2267c478bdstevel@tonic-gate * These statistics are recorded in the following sort_statistics_t structure.
2277c478bdstevel@tonic-gate */
2287c478bdstevel@tonic-gatetypedef struct sort_statistics {
2297c478bdstevel@tonic-gate	u_longlong_t	st_avail_mem;
2307c478bdstevel@tonic-gate	u_longlong_t	st_convert_reallocs;
2317c478bdstevel@tonic-gate	u_longlong_t	st_fetched_lines;
2327c478bdstevel@tonic-gate	u_longlong_t	st_insert_full_down;
2337c478bdstevel@tonic-gate	u_longlong_t	st_insert_full_input;
2347c478bdstevel@tonic-gate	u_longlong_t	st_insert_full_up;
2357c478bdstevel@tonic-gate	u_longlong_t	st_line_conversions;
2367c478bdstevel@tonic-gate	u_longlong_t	st_not_unique_lines;
2377c478bdstevel@tonic-gate	u_longlong_t	st_put_lines;
2387c478bdstevel@tonic-gate	u_longlong_t	st_put_temp_lines_internal;
2397c478bdstevel@tonic-gate	u_longlong_t	st_put_temp_lines_merge;
2407c478bdstevel@tonic-gate	u_longlong_t	st_put_unique_lines;
2417c478bdstevel@tonic-gate	u_longlong_t	st_shelved_lines;
2427c478bdstevel@tonic-gate	u_longlong_t	st_subfiles;		/* number of insertion sorts */
2437c478bdstevel@tonic-gate	u_longlong_t	st_swaps;
2447c478bdstevel@tonic-gate	u_longlong_t	st_tqs_calls;
2457c478bdstevel@tonic-gate
2467c478bdstevel@tonic-gate	uint_t		st_input_files;
2477c478bdstevel@tonic-gate	uint_t		st_merge_files;
2487c478bdstevel@tonic-gate} sort_statistics_t;
2497c478bdstevel@tonic-gate
2507c478bdstevel@tonic-gatetypedef struct sort {
2517c478bdstevel@tonic-gate	stream_t	*m_input_streams;
2527c478bdstevel@tonic-gate	char		*m_output_filename;
2537c478bdstevel@tonic-gate
2547c478bdstevel@tonic-gate	stream_t	*m_temporary_streams;
2557c478bdstevel@tonic-gate	char		*m_tmpdir_template;
2567c478bdstevel@tonic-gate
2577c478bdstevel@tonic-gate	field_t		*m_fields_head;
2587c478bdstevel@tonic-gate
2597c478bdstevel@tonic-gate	cmp_fcn_t	m_compare_fn;
2607c478bdstevel@tonic-gate	ssize_t		(*m_coll_convert)(field_t *, line_rec_t *, flag_t,
2617c478bdstevel@tonic-gate	    vchar_t);
2627c478bdstevel@tonic-gate
2637c478bdstevel@tonic-gate	sort_statistics_t *m_stats;
2647c478bdstevel@tonic-gate	size_t		m_memory_limit;
2657c478bdstevel@tonic-gate	size_t		m_memory_available;
2667c478bdstevel@tonic-gate
2677c478bdstevel@tonic-gate	flag_t		m_check_if_sorted_only;
2687c478bdstevel@tonic-gate	flag_t		m_merge_only;
2697c478bdstevel@tonic-gate	flag_t		m_unique_lines;
2707c478bdstevel@tonic-gate	flag_t		m_entire_line;
2717c478bdstevel@tonic-gate
2727c478bdstevel@tonic-gate	enum field_species m_default_species;
2737c478bdstevel@tonic-gate	flag_t		m_field_options;
2747c478bdstevel@tonic-gate	vchar_t		m_field_separator;
2757c478bdstevel@tonic-gate
2767c478bdstevel@tonic-gate	flag_t		m_c_locale;
2777c478bdstevel@tonic-gate	flag_t		m_single_byte_locale;
2787c478bdstevel@tonic-gate	flag_t		m_input_from_stdin;
2797c478bdstevel@tonic-gate	flag_t		m_output_to_stdout;
2807c478bdstevel@tonic-gate	flag_t		m_verbose;
2817c478bdstevel@tonic-gate} sort_t;
2827c478bdstevel@tonic-gate
2837c478bdstevel@tonic-gate#ifdef	__cplusplus
2847c478bdstevel@tonic-gate}
2857c478bdstevel@tonic-gate#endif
2867c478bdstevel@tonic-gate
2877c478bdstevel@tonic-gate#endif	/* _SORT_TYPES_H */
288