17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 77c478bd9Sstevel@tonic-gate * with the License. 87c478bd9Sstevel@tonic-gate * 97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 127c478bd9Sstevel@tonic-gate * and limitations under the License. 137c478bd9Sstevel@tonic-gate * 147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 197c478bd9Sstevel@tonic-gate * 207c478bd9Sstevel@tonic-gate * CDDL HEADER END 217c478bd9Sstevel@tonic-gate */ 227c478bd9Sstevel@tonic-gate /* 237c478bd9Sstevel@tonic-gate * Copyright 1998-2003 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate #ifndef _SORT_TYPES_H 287c478bd9Sstevel@tonic-gate #define _SORT_TYPES_H 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #ifdef __cplusplus 317c478bd9Sstevel@tonic-gate extern "C" { 327c478bd9Sstevel@tonic-gate #endif 337c478bd9Sstevel@tonic-gate 347c478bd9Sstevel@tonic-gate #include <sys/resource.h> 357c478bd9Sstevel@tonic-gate #include <sys/types.h> 367c478bd9Sstevel@tonic-gate #include <limits.h> 377c478bd9Sstevel@tonic-gate #include <stdio.h> 387c478bd9Sstevel@tonic-gate #include <stdlib.h> 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate typedef int flag_t; 417c478bd9Sstevel@tonic-gate 427c478bd9Sstevel@tonic-gate typedef int (*cmp_fcn_t)(void *, void *, flag_t); 437c478bd9Sstevel@tonic-gate 447c478bd9Sstevel@tonic-gate typedef union vchar { 457c478bd9Sstevel@tonic-gate char sc; 467c478bd9Sstevel@tonic-gate uchar_t usc; 477c478bd9Sstevel@tonic-gate wchar_t wc; 487c478bd9Sstevel@tonic-gate } vchar_t; 497c478bd9Sstevel@tonic-gate 507c478bd9Sstevel@tonic-gate typedef union vcharptr { 517c478bd9Sstevel@tonic-gate char *sp; 527c478bd9Sstevel@tonic-gate uchar_t *usp; 537c478bd9Sstevel@tonic-gate wchar_t *wp; 547c478bd9Sstevel@tonic-gate } vcharptr_t; 557c478bd9Sstevel@tonic-gate 567c478bd9Sstevel@tonic-gate typedef struct line_rec { 577c478bd9Sstevel@tonic-gate vcharptr_t l_data; /* raw data */ 587c478bd9Sstevel@tonic-gate vcharptr_t l_raw_collate; /* collatable raw data */ 597c478bd9Sstevel@tonic-gate vcharptr_t l_collate; /* key-ordered collatable string */ 607c478bd9Sstevel@tonic-gate ssize_t l_data_length; 617c478bd9Sstevel@tonic-gate ssize_t l_collate_length; 627c478bd9Sstevel@tonic-gate ssize_t l_collate_bufsize; 637c478bd9Sstevel@tonic-gate } line_rec_t; 647c478bd9Sstevel@tonic-gate 657c478bd9Sstevel@tonic-gate enum field_species { 667c478bd9Sstevel@tonic-gate ALPHA, 677c478bd9Sstevel@tonic-gate MONTH, 687c478bd9Sstevel@tonic-gate NUMERIC 697c478bd9Sstevel@tonic-gate }; 707c478bd9Sstevel@tonic-gate 717c478bd9Sstevel@tonic-gate #define FIELD_DICTIONARY_ORDER 0x1 727c478bd9Sstevel@tonic-gate #define FIELD_FOLD_UPPERCASE 0x2 737c478bd9Sstevel@tonic-gate #define FIELD_IGNORE_NONPRINTABLES 0x4 747c478bd9Sstevel@tonic-gate #define FIELD_IGNORE_BLANKS_START 0x8 757c478bd9Sstevel@tonic-gate #define FIELD_IGNORE_BLANKS_END 0x10 767c478bd9Sstevel@tonic-gate 777c478bd9Sstevel@tonic-gate #define FIELD_REVERSE_COMPARISONS 0x20 787c478bd9Sstevel@tonic-gate 797c478bd9Sstevel@tonic-gate #define FIELD_MODIFIERS_DEFINED 0x40 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate typedef struct field { 827c478bd9Sstevel@tonic-gate struct field *f_next; 837c478bd9Sstevel@tonic-gate 847c478bd9Sstevel@tonic-gate /* 857c478bd9Sstevel@tonic-gate * field ops vector 867c478bd9Sstevel@tonic-gate */ 877c478bd9Sstevel@tonic-gate ssize_t (*f_convert)(struct field *, line_rec_t *, 887c478bd9Sstevel@tonic-gate vchar_t, ssize_t, ssize_t, ssize_t); 897c478bd9Sstevel@tonic-gate enum field_species f_species; 907c478bd9Sstevel@tonic-gate 917c478bd9Sstevel@tonic-gate /* 927c478bd9Sstevel@tonic-gate * starting and ending fields, and offsets 937c478bd9Sstevel@tonic-gate */ 947c478bd9Sstevel@tonic-gate int f_start_field; 957c478bd9Sstevel@tonic-gate ssize_t f_start_offset; 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate int f_end_field; 987c478bd9Sstevel@tonic-gate ssize_t f_end_offset; 997c478bd9Sstevel@tonic-gate 1007c478bd9Sstevel@tonic-gate flag_t f_options; 1017c478bd9Sstevel@tonic-gate } field_t; 1027c478bd9Sstevel@tonic-gate 1037c478bd9Sstevel@tonic-gate #define STREAM_SOURCE_MASK 0x000f 1047c478bd9Sstevel@tonic-gate #define STREAM_NO_SOURCE 0x0000 1057c478bd9Sstevel@tonic-gate #define STREAM_ARRAY 0x0001 1067c478bd9Sstevel@tonic-gate #define STREAM_MMAP 0x0002 1077c478bd9Sstevel@tonic-gate #define STREAM_SINGLE 0x0004 1087c478bd9Sstevel@tonic-gate #define STREAM_WIDE 0x0008 1097c478bd9Sstevel@tonic-gate 1107c478bd9Sstevel@tonic-gate #define STREAM_OPEN 0x0010 1117c478bd9Sstevel@tonic-gate #define STREAM_PRIMED 0x0020 1127c478bd9Sstevel@tonic-gate 1137c478bd9Sstevel@tonic-gate #define STREAM_OUTPUT 0x0040 1147c478bd9Sstevel@tonic-gate #define STREAM_EOS_REACHED 0x0080 1157c478bd9Sstevel@tonic-gate #define STREAM_NOTFILE 0x0100 1167c478bd9Sstevel@tonic-gate #define STREAM_UNIQUE 0x0200 1177c478bd9Sstevel@tonic-gate #define STREAM_INSTANT 0x0400 1187c478bd9Sstevel@tonic-gate #define STREAM_TEMPORARY 0x0800 1197c478bd9Sstevel@tonic-gate #define STREAM_NOT_FREEABLE 0x1000 1207c478bd9Sstevel@tonic-gate 1217c478bd9Sstevel@tonic-gate #define DEFAULT_INPUT_SIZE (1 * MEGABYTE) 1227c478bd9Sstevel@tonic-gate #define DEFAULT_RELEASE_SIZE (MEGABYTE / 2) 1237c478bd9Sstevel@tonic-gate 1247c478bd9Sstevel@tonic-gate #define CHAR_AVG_LINE 32 1257c478bd9Sstevel@tonic-gate #define WCHAR_AVG_LINE (sizeof (wchar_t) * CHAR_AVG_LINE) 1267c478bd9Sstevel@tonic-gate #define XFRM_MULTIPLIER 8 1277c478bd9Sstevel@tonic-gate 1287c478bd9Sstevel@tonic-gate #define NEXT_LINE_COMPLETE 0x0 1297c478bd9Sstevel@tonic-gate #define NEXT_LINE_INCOMPLETE 0x1 1307c478bd9Sstevel@tonic-gate 1317c478bd9Sstevel@tonic-gate #define PRIME_SUCCEEDED 0x0 1327c478bd9Sstevel@tonic-gate #define PRIME_FAILED_EMPTY_FILE 0x1 1337c478bd9Sstevel@tonic-gate #define PRIME_FAILED 0x2 1347c478bd9Sstevel@tonic-gate 1357c478bd9Sstevel@tonic-gate typedef struct stream_array { 1367c478bd9Sstevel@tonic-gate line_rec_t **s_array; 1377c478bd9Sstevel@tonic-gate ssize_t s_array_size; 1387c478bd9Sstevel@tonic-gate ssize_t s_cur_index; 1397c478bd9Sstevel@tonic-gate } stream_array_t; 1407c478bd9Sstevel@tonic-gate 1417c478bd9Sstevel@tonic-gate typedef struct stream_simple_file { 1427c478bd9Sstevel@tonic-gate /* 1437c478bd9Sstevel@tonic-gate * stream_simple_file_t is used for STREAM_MMAP and for STREAM_OUTPUT 1447c478bd9Sstevel@tonic-gate * for either single- (STREAM_SINGLE | STREAM_OUTPUT) or multi-byte 1457c478bd9Sstevel@tonic-gate * (STREAM_WIDE | STREAM_OUTPUT) locales. 1467c478bd9Sstevel@tonic-gate */ 1477c478bd9Sstevel@tonic-gate int s_fd; /* file descriptor */ 1487c478bd9Sstevel@tonic-gate caddr_t s_release_origin; /* start for next madvise(3C) */ 1497c478bd9Sstevel@tonic-gate } stream_simple_file_t; 1507c478bd9Sstevel@tonic-gate 1517c478bd9Sstevel@tonic-gate typedef struct stream_buffered_file { 1527c478bd9Sstevel@tonic-gate /* 1537c478bd9Sstevel@tonic-gate * stream_buffered_file_t is used for both STREAM_STDIO and 1547c478bd9Sstevel@tonic-gate * STREAM_WIDE. 1557c478bd9Sstevel@tonic-gate */ 1567c478bd9Sstevel@tonic-gate FILE *s_fp; /* file stream */ 1577c478bd9Sstevel@tonic-gate void *s_vbuf; /* stdio alternate buffer */ 1587c478bd9Sstevel@tonic-gate size_t s_bytes_used; 1597c478bd9Sstevel@tonic-gate } stream_buffered_file_t; 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate typedef union stream_type { 1627c478bd9Sstevel@tonic-gate stream_array_t LA; /* array of line records */ 1637c478bd9Sstevel@tonic-gate stream_simple_file_t SF; /* file accessed via mmap */ 1647c478bd9Sstevel@tonic-gate stream_buffered_file_t BF; /* file accessed via stdio */ 1657c478bd9Sstevel@tonic-gate } stream_type_t; 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate struct stream; 1687c478bd9Sstevel@tonic-gate 1697c478bd9Sstevel@tonic-gate typedef struct stream_ops { 1707c478bd9Sstevel@tonic-gate int (*sop_is_closable)(struct stream *); 1717c478bd9Sstevel@tonic-gate int (*sop_close)(struct stream *); 1727c478bd9Sstevel@tonic-gate int (*sop_eos)(struct stream *); 1737c478bd9Sstevel@tonic-gate ssize_t (*sop_fetch)(struct stream *); 1747c478bd9Sstevel@tonic-gate void (*sop_flush)(struct stream *); 1757c478bd9Sstevel@tonic-gate int (*sop_free)(struct stream *); 1767c478bd9Sstevel@tonic-gate int (*sop_open_for_write)(struct stream *); 1777c478bd9Sstevel@tonic-gate int (*sop_prime)(struct stream *); 1787c478bd9Sstevel@tonic-gate void (*sop_put_line)(struct stream *, line_rec_t *); 1797c478bd9Sstevel@tonic-gate void (*sop_release_line)(struct stream *); 1807c478bd9Sstevel@tonic-gate void (*sop_send_eol)(struct stream *); 1817c478bd9Sstevel@tonic-gate int (*sop_unlink)(struct stream *); 1827c478bd9Sstevel@tonic-gate } stream_ops_t; 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate #define SOP_IS_CLOSABLE(s) ((s)->s_ops.sop_is_closable)(s) 1857c478bd9Sstevel@tonic-gate #define SOP_CLOSE(s) ((s)->s_ops.sop_close)(s) 1867c478bd9Sstevel@tonic-gate #define SOP_EOS(s) ((s)->s_ops.sop_eos)(s) 1877c478bd9Sstevel@tonic-gate #define SOP_FETCH(s) ((s)->s_ops.sop_fetch)(s) 1887c478bd9Sstevel@tonic-gate #define SOP_FLUSH(s) ((s)->s_ops.sop_flush)(s) 1897c478bd9Sstevel@tonic-gate #define SOP_FREE(s) ((s)->s_ops.sop_free)(s) 1907c478bd9Sstevel@tonic-gate #define SOP_OPEN_FOR_WRITE(s) ((s)->s_ops.sop_open_for_write)(s) 1917c478bd9Sstevel@tonic-gate #define SOP_PRIME(s) ((s)->s_ops.sop_prime)(s) 1927c478bd9Sstevel@tonic-gate #define SOP_PUT_LINE(s, l) ((s)->s_ops.sop_put_line)(s, l) 1937c478bd9Sstevel@tonic-gate #define SOP_RELEASE_LINE(s) ((s)->s_ops.sop_release_line)(s) 1947c478bd9Sstevel@tonic-gate #define SOP_SEND_EOL(s) ((s)->s_ops.sop_send_eol)(s) 1957c478bd9Sstevel@tonic-gate #define SOP_UNLINK(s) ((s)->s_ops.sop_unlink)(s) 1967c478bd9Sstevel@tonic-gate 1977c478bd9Sstevel@tonic-gate /* 1987c478bd9Sstevel@tonic-gate * The stream_t type is provided to simplify access to files, particularly for 1997c478bd9Sstevel@tonic-gate * external merges. 2007c478bd9Sstevel@tonic-gate */ 2017c478bd9Sstevel@tonic-gate typedef struct stream { 2027c478bd9Sstevel@tonic-gate struct stream *s_consumer; /* dependent on s_buffer */ 2037c478bd9Sstevel@tonic-gate struct stream *s_previous; 2047c478bd9Sstevel@tonic-gate struct stream *s_next; 2057c478bd9Sstevel@tonic-gate 2067c478bd9Sstevel@tonic-gate char *s_filename; 2077c478bd9Sstevel@tonic-gate 2087c478bd9Sstevel@tonic-gate line_rec_t s_current; /* present line buffers */ 2097c478bd9Sstevel@tonic-gate stream_ops_t s_ops; /* type-specific ops vector */ 2107c478bd9Sstevel@tonic-gate stream_type_t s_type; /* type-specific attributes */ 2117c478bd9Sstevel@tonic-gate 2127c478bd9Sstevel@tonic-gate void *s_buffer; 2137c478bd9Sstevel@tonic-gate size_t s_buffer_size; 2147c478bd9Sstevel@tonic-gate off_t s_filesize; 2157c478bd9Sstevel@tonic-gate size_t s_element_size; 2167c478bd9Sstevel@tonic-gate flag_t s_status; /* flags */ 2177c478bd9Sstevel@tonic-gate ino_t s_ino; 2187c478bd9Sstevel@tonic-gate dev_t s_dev; 2197c478bd9Sstevel@tonic-gate } stream_t; 2207c478bd9Sstevel@tonic-gate 2217c478bd9Sstevel@tonic-gate /* 2227c478bd9Sstevel@tonic-gate * sort(1) has, for debugging purposes, a primitive compile-time option to 2237c478bd9Sstevel@tonic-gate * generate statistics of various operations executed during an invocation. 2247c478bd9Sstevel@tonic-gate * These statistics are recorded in the following sort_statistics_t structure. 2257c478bd9Sstevel@tonic-gate */ 2267c478bd9Sstevel@tonic-gate typedef struct sort_statistics { 2277c478bd9Sstevel@tonic-gate u_longlong_t st_avail_mem; 2287c478bd9Sstevel@tonic-gate u_longlong_t st_convert_reallocs; 2297c478bd9Sstevel@tonic-gate u_longlong_t st_fetched_lines; 2307c478bd9Sstevel@tonic-gate u_longlong_t st_insert_full_down; 2317c478bd9Sstevel@tonic-gate u_longlong_t st_insert_full_input; 2327c478bd9Sstevel@tonic-gate u_longlong_t st_insert_full_up; 2337c478bd9Sstevel@tonic-gate u_longlong_t st_line_conversions; 2347c478bd9Sstevel@tonic-gate u_longlong_t st_not_unique_lines; 2357c478bd9Sstevel@tonic-gate u_longlong_t st_put_lines; 2367c478bd9Sstevel@tonic-gate u_longlong_t st_put_temp_lines_internal; 2377c478bd9Sstevel@tonic-gate u_longlong_t st_put_temp_lines_merge; 2387c478bd9Sstevel@tonic-gate u_longlong_t st_put_unique_lines; 2397c478bd9Sstevel@tonic-gate u_longlong_t st_shelved_lines; 2407c478bd9Sstevel@tonic-gate u_longlong_t st_subfiles; /* number of insertion sorts */ 2417c478bd9Sstevel@tonic-gate u_longlong_t st_swaps; 2427c478bd9Sstevel@tonic-gate u_longlong_t st_tqs_calls; 2437c478bd9Sstevel@tonic-gate 2447c478bd9Sstevel@tonic-gate uint_t st_input_files; 2457c478bd9Sstevel@tonic-gate uint_t st_merge_files; 2467c478bd9Sstevel@tonic-gate } sort_statistics_t; 2477c478bd9Sstevel@tonic-gate 2487c478bd9Sstevel@tonic-gate typedef struct sort { 2497c478bd9Sstevel@tonic-gate stream_t *m_input_streams; 2507c478bd9Sstevel@tonic-gate char *m_output_filename; 2517c478bd9Sstevel@tonic-gate 2527c478bd9Sstevel@tonic-gate stream_t *m_temporary_streams; 2537c478bd9Sstevel@tonic-gate char *m_tmpdir_template; 2547c478bd9Sstevel@tonic-gate 2557c478bd9Sstevel@tonic-gate field_t *m_fields_head; 2567c478bd9Sstevel@tonic-gate 2577c478bd9Sstevel@tonic-gate cmp_fcn_t m_compare_fn; 2587c478bd9Sstevel@tonic-gate ssize_t (*m_coll_convert)(field_t *, line_rec_t *, flag_t, 2597c478bd9Sstevel@tonic-gate vchar_t); 2607c478bd9Sstevel@tonic-gate 2617c478bd9Sstevel@tonic-gate sort_statistics_t *m_stats; 2627c478bd9Sstevel@tonic-gate size_t m_memory_limit; 2637c478bd9Sstevel@tonic-gate size_t m_memory_available; 2647c478bd9Sstevel@tonic-gate 2657c478bd9Sstevel@tonic-gate flag_t m_check_if_sorted_only; 2667c478bd9Sstevel@tonic-gate flag_t m_merge_only; 2677c478bd9Sstevel@tonic-gate flag_t m_unique_lines; 2687c478bd9Sstevel@tonic-gate flag_t m_entire_line; 2697c478bd9Sstevel@tonic-gate 2707c478bd9Sstevel@tonic-gate enum field_species m_default_species; 2717c478bd9Sstevel@tonic-gate flag_t m_field_options; 2727c478bd9Sstevel@tonic-gate vchar_t m_field_separator; 2737c478bd9Sstevel@tonic-gate 2747c478bd9Sstevel@tonic-gate flag_t m_c_locale; 2757c478bd9Sstevel@tonic-gate flag_t m_single_byte_locale; 2767c478bd9Sstevel@tonic-gate flag_t m_input_from_stdin; 2777c478bd9Sstevel@tonic-gate flag_t m_output_to_stdout; 2787c478bd9Sstevel@tonic-gate flag_t m_verbose; 2797c478bd9Sstevel@tonic-gate } sort_t; 2807c478bd9Sstevel@tonic-gate 2817c478bd9Sstevel@tonic-gate #ifdef __cplusplus 2827c478bd9Sstevel@tonic-gate } 2837c478bd9Sstevel@tonic-gate #endif 2847c478bd9Sstevel@tonic-gate 2857c478bd9Sstevel@tonic-gate #endif /* _SORT_TYPES_H */ 286