xref: /illumos-gate/usr/src/cmd/sort/types.h (revision 101e15b5)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate  * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #ifndef	_SORT_TYPES_H
287c478bd9Sstevel@tonic-gate #define	_SORT_TYPES_H
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate #ifdef	__cplusplus
317c478bd9Sstevel@tonic-gate extern "C" {
327c478bd9Sstevel@tonic-gate #endif
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate #include <sys/resource.h>
357c478bd9Sstevel@tonic-gate #include <sys/types.h>
367c478bd9Sstevel@tonic-gate #include <limits.h>
377c478bd9Sstevel@tonic-gate #include <stdio.h>
387c478bd9Sstevel@tonic-gate #include <stdlib.h>
397c478bd9Sstevel@tonic-gate 
407c478bd9Sstevel@tonic-gate typedef	int flag_t;
417c478bd9Sstevel@tonic-gate 
427c478bd9Sstevel@tonic-gate typedef	int (*cmp_fcn_t)(void *, void *, flag_t);
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate typedef union vchar {
457c478bd9Sstevel@tonic-gate 	char	sc;
467c478bd9Sstevel@tonic-gate 	uchar_t	usc;
477c478bd9Sstevel@tonic-gate 	wchar_t	wc;
487c478bd9Sstevel@tonic-gate } vchar_t;
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate typedef union vcharptr {
517c478bd9Sstevel@tonic-gate 	char	*sp;
527c478bd9Sstevel@tonic-gate 	uchar_t	*usp;
537c478bd9Sstevel@tonic-gate 	wchar_t *wp;
547c478bd9Sstevel@tonic-gate } vcharptr_t;
557c478bd9Sstevel@tonic-gate 
567c478bd9Sstevel@tonic-gate typedef struct line_rec {
577c478bd9Sstevel@tonic-gate 	vcharptr_t l_data;		/* raw data */
587c478bd9Sstevel@tonic-gate 	vcharptr_t l_raw_collate;	/* collatable raw data */
597c478bd9Sstevel@tonic-gate 	vcharptr_t l_collate;		/* key-ordered collatable string */
607c478bd9Sstevel@tonic-gate 	ssize_t	l_data_length;
617c478bd9Sstevel@tonic-gate 	ssize_t	l_collate_length;
627c478bd9Sstevel@tonic-gate 	ssize_t	l_collate_bufsize;
637c478bd9Sstevel@tonic-gate } line_rec_t;
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate enum field_species {
667c478bd9Sstevel@tonic-gate 	ALPHA,
677c478bd9Sstevel@tonic-gate 	MONTH,
687c478bd9Sstevel@tonic-gate 	NUMERIC
697c478bd9Sstevel@tonic-gate };
707c478bd9Sstevel@tonic-gate 
717c478bd9Sstevel@tonic-gate #define	FIELD_DICTIONARY_ORDER		0x1
727c478bd9Sstevel@tonic-gate #define	FIELD_FOLD_UPPERCASE		0x2
737c478bd9Sstevel@tonic-gate #define	FIELD_IGNORE_NONPRINTABLES	0x4
747c478bd9Sstevel@tonic-gate #define	FIELD_IGNORE_BLANKS_START	0x8
757c478bd9Sstevel@tonic-gate #define	FIELD_IGNORE_BLANKS_END		0x10
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate #define	FIELD_REVERSE_COMPARISONS	0x20
787c478bd9Sstevel@tonic-gate 
797c478bd9Sstevel@tonic-gate #define	FIELD_MODIFIERS_DEFINED		0x40
807c478bd9Sstevel@tonic-gate 
817c478bd9Sstevel@tonic-gate typedef struct field {
827c478bd9Sstevel@tonic-gate 	struct field		*f_next;
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate 	/*
857c478bd9Sstevel@tonic-gate 	 * field ops vector
867c478bd9Sstevel@tonic-gate 	 */
877c478bd9Sstevel@tonic-gate 	ssize_t			(*f_convert)(struct field *, line_rec_t *,
887c478bd9Sstevel@tonic-gate 	    vchar_t, ssize_t, ssize_t, ssize_t);
897c478bd9Sstevel@tonic-gate 	enum field_species	f_species;
907c478bd9Sstevel@tonic-gate 
917c478bd9Sstevel@tonic-gate 	/*
927c478bd9Sstevel@tonic-gate 	 * starting and ending fields, and offsets
937c478bd9Sstevel@tonic-gate 	 */
947c478bd9Sstevel@tonic-gate 	int			f_start_field;
957c478bd9Sstevel@tonic-gate 	ssize_t			f_start_offset;
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate 	int			f_end_field;
987c478bd9Sstevel@tonic-gate 	ssize_t			f_end_offset;
997c478bd9Sstevel@tonic-gate 
1007c478bd9Sstevel@tonic-gate 	flag_t			f_options;
1017c478bd9Sstevel@tonic-gate } field_t;
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate #define	STREAM_SOURCE_MASK	0x000f
1047c478bd9Sstevel@tonic-gate #define	STREAM_NO_SOURCE	0x0000
1057c478bd9Sstevel@tonic-gate #define	STREAM_ARRAY		0x0001
1067c478bd9Sstevel@tonic-gate #define	STREAM_MMAP		0x0002
1077c478bd9Sstevel@tonic-gate #define	STREAM_SINGLE		0x0004
1087c478bd9Sstevel@tonic-gate #define	STREAM_WIDE		0x0008
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate #define	STREAM_OPEN		0x0010
1117c478bd9Sstevel@tonic-gate #define	STREAM_PRIMED		0x0020
1127c478bd9Sstevel@tonic-gate 
1137c478bd9Sstevel@tonic-gate #define	STREAM_OUTPUT		0x0040
1147c478bd9Sstevel@tonic-gate #define	STREAM_EOS_REACHED	0x0080
1157c478bd9Sstevel@tonic-gate #define	STREAM_NOTFILE		0x0100
1167c478bd9Sstevel@tonic-gate #define	STREAM_UNIQUE		0x0200
1177c478bd9Sstevel@tonic-gate #define	STREAM_INSTANT		0x0400
1187c478bd9Sstevel@tonic-gate #define	STREAM_TEMPORARY	0x0800
1197c478bd9Sstevel@tonic-gate #define	STREAM_NOT_FREEABLE	0x1000
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate #define	DEFAULT_INPUT_SIZE	(1 * MEGABYTE)
1227c478bd9Sstevel@tonic-gate #define	DEFAULT_RELEASE_SIZE	(MEGABYTE / 2)
1237c478bd9Sstevel@tonic-gate 
1247c478bd9Sstevel@tonic-gate #define	CHAR_AVG_LINE	32
1257c478bd9Sstevel@tonic-gate #define	WCHAR_AVG_LINE	(sizeof (wchar_t) * CHAR_AVG_LINE)
1267c478bd9Sstevel@tonic-gate #define	XFRM_MULTIPLIER	8
1277c478bd9Sstevel@tonic-gate 
1287c478bd9Sstevel@tonic-gate #define	NEXT_LINE_COMPLETE	0x0
1297c478bd9Sstevel@tonic-gate #define	NEXT_LINE_INCOMPLETE	0x1
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate #define	PRIME_SUCCEEDED		0x0
1327c478bd9Sstevel@tonic-gate #define	PRIME_FAILED_EMPTY_FILE	0x1
1337c478bd9Sstevel@tonic-gate #define	PRIME_FAILED		0x2
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate typedef struct stream_array {
1367c478bd9Sstevel@tonic-gate 	line_rec_t	**s_array;
1377c478bd9Sstevel@tonic-gate 	ssize_t		s_array_size;
1387c478bd9Sstevel@tonic-gate 	ssize_t		s_cur_index;
1397c478bd9Sstevel@tonic-gate } stream_array_t;
1407c478bd9Sstevel@tonic-gate 
1417c478bd9Sstevel@tonic-gate typedef struct stream_simple_file {
1427c478bd9Sstevel@tonic-gate 	/*
1437c478bd9Sstevel@tonic-gate 	 * stream_simple_file_t is used for STREAM_MMAP and for STREAM_OUTPUT
1447c478bd9Sstevel@tonic-gate 	 * for either single- (STREAM_SINGLE | STREAM_OUTPUT) or multi-byte
1457c478bd9Sstevel@tonic-gate 	 * (STREAM_WIDE | STREAM_OUTPUT) locales.
1467c478bd9Sstevel@tonic-gate 	 */
1477c478bd9Sstevel@tonic-gate 	int		s_fd;			/* file descriptor */
1487c478bd9Sstevel@tonic-gate 	caddr_t		s_release_origin;	/* start for next madvise(3C) */
1497c478bd9Sstevel@tonic-gate } stream_simple_file_t;
1507c478bd9Sstevel@tonic-gate 
1517c478bd9Sstevel@tonic-gate typedef struct stream_buffered_file {
1527c478bd9Sstevel@tonic-gate 	/*
1537c478bd9Sstevel@tonic-gate 	 * stream_buffered_file_t is used for both STREAM_STDIO and
1547c478bd9Sstevel@tonic-gate 	 * STREAM_WIDE.
1557c478bd9Sstevel@tonic-gate 	 */
1567c478bd9Sstevel@tonic-gate 	FILE		*s_fp;			/* file stream */
1577c478bd9Sstevel@tonic-gate 	void		*s_vbuf;		/* stdio alternate buffer */
1587c478bd9Sstevel@tonic-gate 	size_t		s_bytes_used;
1597c478bd9Sstevel@tonic-gate } stream_buffered_file_t;
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate typedef union stream_type {
1627c478bd9Sstevel@tonic-gate 	stream_array_t		LA;	/* array of line records */
1637c478bd9Sstevel@tonic-gate 	stream_simple_file_t	SF;	/* file accessed via mmap */
1647c478bd9Sstevel@tonic-gate 	stream_buffered_file_t	BF;	/* file accessed via stdio */
1657c478bd9Sstevel@tonic-gate } stream_type_t;
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate struct stream;
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate typedef struct stream_ops {
1707c478bd9Sstevel@tonic-gate 	int	(*sop_is_closable)(struct stream *);
1717c478bd9Sstevel@tonic-gate 	int	(*sop_close)(struct stream *);
1727c478bd9Sstevel@tonic-gate 	int	(*sop_eos)(struct stream *);
1737c478bd9Sstevel@tonic-gate 	ssize_t	(*sop_fetch)(struct stream *);
1747c478bd9Sstevel@tonic-gate 	void	(*sop_flush)(struct stream *);
1757c478bd9Sstevel@tonic-gate 	int	(*sop_free)(struct stream *);
1767c478bd9Sstevel@tonic-gate 	int	(*sop_open_for_write)(struct stream *);
1777c478bd9Sstevel@tonic-gate 	int	(*sop_prime)(struct stream *);
1787c478bd9Sstevel@tonic-gate 	void	(*sop_put_line)(struct stream *, line_rec_t *);
1797c478bd9Sstevel@tonic-gate 	void	(*sop_release_line)(struct stream *);
1807c478bd9Sstevel@tonic-gate 	void	(*sop_send_eol)(struct stream *);
1817c478bd9Sstevel@tonic-gate 	int	(*sop_unlink)(struct stream *);
1827c478bd9Sstevel@tonic-gate } stream_ops_t;
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate #define	SOP_IS_CLOSABLE(s)	((s)->s_ops.sop_is_closable)(s)
1857c478bd9Sstevel@tonic-gate #define	SOP_CLOSE(s)		((s)->s_ops.sop_close)(s)
1867c478bd9Sstevel@tonic-gate #define	SOP_EOS(s)		((s)->s_ops.sop_eos)(s)
1877c478bd9Sstevel@tonic-gate #define	SOP_FETCH(s)		((s)->s_ops.sop_fetch)(s)
1887c478bd9Sstevel@tonic-gate #define	SOP_FLUSH(s)		((s)->s_ops.sop_flush)(s)
1897c478bd9Sstevel@tonic-gate #define	SOP_FREE(s)		((s)->s_ops.sop_free)(s)
1907c478bd9Sstevel@tonic-gate #define	SOP_OPEN_FOR_WRITE(s)	((s)->s_ops.sop_open_for_write)(s)
1917c478bd9Sstevel@tonic-gate #define	SOP_PRIME(s)		((s)->s_ops.sop_prime)(s)
1927c478bd9Sstevel@tonic-gate #define	SOP_PUT_LINE(s, l)	((s)->s_ops.sop_put_line)(s, l)
1937c478bd9Sstevel@tonic-gate #define	SOP_RELEASE_LINE(s)	((s)->s_ops.sop_release_line)(s)
1947c478bd9Sstevel@tonic-gate #define	SOP_SEND_EOL(s)		((s)->s_ops.sop_send_eol)(s)
1957c478bd9Sstevel@tonic-gate #define	SOP_UNLINK(s)		((s)->s_ops.sop_unlink)(s)
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate /*
1987c478bd9Sstevel@tonic-gate  * The stream_t type is provided to simplify access to files, particularly for
1997c478bd9Sstevel@tonic-gate  * external merges.
2007c478bd9Sstevel@tonic-gate  */
2017c478bd9Sstevel@tonic-gate typedef struct stream {
2027c478bd9Sstevel@tonic-gate 	struct stream	*s_consumer;	/* dependent on s_buffer */
2037c478bd9Sstevel@tonic-gate 	struct stream	*s_previous;
2047c478bd9Sstevel@tonic-gate 	struct stream	*s_next;
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate 	char		*s_filename;
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 	line_rec_t	s_current;	/* present line buffers */
2097c478bd9Sstevel@tonic-gate 	stream_ops_t	s_ops;		/* type-specific ops vector */
2107c478bd9Sstevel@tonic-gate 	stream_type_t	s_type;		/* type-specific attributes */
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate 	void		*s_buffer;
2137c478bd9Sstevel@tonic-gate 	size_t		s_buffer_size;
2147c478bd9Sstevel@tonic-gate 	off_t		s_filesize;
2157c478bd9Sstevel@tonic-gate 	size_t		s_element_size;
2167c478bd9Sstevel@tonic-gate 	flag_t		s_status;	/* flags */
2177c478bd9Sstevel@tonic-gate 	ino_t		s_ino;
2187c478bd9Sstevel@tonic-gate 	dev_t		s_dev;
2197c478bd9Sstevel@tonic-gate } stream_t;
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate /*
2227c478bd9Sstevel@tonic-gate  * sort(1) has, for debugging purposes, a primitive compile-time option to
2237c478bd9Sstevel@tonic-gate  * generate statistics of various operations executed during an invocation.
2247c478bd9Sstevel@tonic-gate  * These statistics are recorded in the following sort_statistics_t structure.
2257c478bd9Sstevel@tonic-gate  */
2267c478bd9Sstevel@tonic-gate typedef struct sort_statistics {
2277c478bd9Sstevel@tonic-gate 	u_longlong_t	st_avail_mem;
2287c478bd9Sstevel@tonic-gate 	u_longlong_t	st_convert_reallocs;
2297c478bd9Sstevel@tonic-gate 	u_longlong_t	st_fetched_lines;
2307c478bd9Sstevel@tonic-gate 	u_longlong_t	st_insert_full_down;
2317c478bd9Sstevel@tonic-gate 	u_longlong_t	st_insert_full_input;
2327c478bd9Sstevel@tonic-gate 	u_longlong_t	st_insert_full_up;
2337c478bd9Sstevel@tonic-gate 	u_longlong_t	st_line_conversions;
2347c478bd9Sstevel@tonic-gate 	u_longlong_t	st_not_unique_lines;
2357c478bd9Sstevel@tonic-gate 	u_longlong_t	st_put_lines;
2367c478bd9Sstevel@tonic-gate 	u_longlong_t	st_put_temp_lines_internal;
2377c478bd9Sstevel@tonic-gate 	u_longlong_t	st_put_temp_lines_merge;
2387c478bd9Sstevel@tonic-gate 	u_longlong_t	st_put_unique_lines;
2397c478bd9Sstevel@tonic-gate 	u_longlong_t	st_shelved_lines;
2407c478bd9Sstevel@tonic-gate 	u_longlong_t	st_subfiles;		/* number of insertion sorts */
2417c478bd9Sstevel@tonic-gate 	u_longlong_t	st_swaps;
2427c478bd9Sstevel@tonic-gate 	u_longlong_t	st_tqs_calls;
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate 	uint_t		st_input_files;
2457c478bd9Sstevel@tonic-gate 	uint_t		st_merge_files;
2467c478bd9Sstevel@tonic-gate } sort_statistics_t;
2477c478bd9Sstevel@tonic-gate 
2487c478bd9Sstevel@tonic-gate typedef struct sort {
2497c478bd9Sstevel@tonic-gate 	stream_t	*m_input_streams;
2507c478bd9Sstevel@tonic-gate 	char		*m_output_filename;
2517c478bd9Sstevel@tonic-gate 
2527c478bd9Sstevel@tonic-gate 	stream_t	*m_temporary_streams;
2537c478bd9Sstevel@tonic-gate 	char		*m_tmpdir_template;
2547c478bd9Sstevel@tonic-gate 
2557c478bd9Sstevel@tonic-gate 	field_t		*m_fields_head;
2567c478bd9Sstevel@tonic-gate 
2577c478bd9Sstevel@tonic-gate 	cmp_fcn_t	m_compare_fn;
2587c478bd9Sstevel@tonic-gate 	ssize_t		(*m_coll_convert)(field_t *, line_rec_t *, flag_t,
2597c478bd9Sstevel@tonic-gate 	    vchar_t);
2607c478bd9Sstevel@tonic-gate 
2617c478bd9Sstevel@tonic-gate 	sort_statistics_t *m_stats;
2627c478bd9Sstevel@tonic-gate 	size_t		m_memory_limit;
2637c478bd9Sstevel@tonic-gate 	size_t		m_memory_available;
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate 	flag_t		m_check_if_sorted_only;
2667c478bd9Sstevel@tonic-gate 	flag_t		m_merge_only;
2677c478bd9Sstevel@tonic-gate 	flag_t		m_unique_lines;
2687c478bd9Sstevel@tonic-gate 	flag_t		m_entire_line;
2697c478bd9Sstevel@tonic-gate 
2707c478bd9Sstevel@tonic-gate 	enum field_species m_default_species;
2717c478bd9Sstevel@tonic-gate 	flag_t		m_field_options;
2727c478bd9Sstevel@tonic-gate 	vchar_t		m_field_separator;
2737c478bd9Sstevel@tonic-gate 
2747c478bd9Sstevel@tonic-gate 	flag_t		m_c_locale;
2757c478bd9Sstevel@tonic-gate 	flag_t		m_single_byte_locale;
2767c478bd9Sstevel@tonic-gate 	flag_t		m_input_from_stdin;
2777c478bd9Sstevel@tonic-gate 	flag_t		m_output_to_stdout;
2787c478bd9Sstevel@tonic-gate 	flag_t		m_verbose;
2797c478bd9Sstevel@tonic-gate } sort_t;
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate #ifdef	__cplusplus
2827c478bd9Sstevel@tonic-gate }
2837c478bd9Sstevel@tonic-gate #endif
2847c478bd9Sstevel@tonic-gate 
2857c478bd9Sstevel@tonic-gate #endif	/* _SORT_TYPES_H */
286