xref: /illumos-gate/usr/src/cmd/sort/options.c (revision 101e15b5)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #include "options.h"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * options
317c478bd9Sstevel@tonic-gate  *
327c478bd9Sstevel@tonic-gate  * Overview
337c478bd9Sstevel@tonic-gate  *   sort(1) supports two methods for specifying the sort key:  the original,
347c478bd9Sstevel@tonic-gate  *   now-obsolete, +n -m form and the POSIX -k n,m form.  We refer to the former
357c478bd9Sstevel@tonic-gate  *   as "old specifiers" and the latter as "new specifiers".  The options()
367c478bd9Sstevel@tonic-gate  *   function parses the command line arguments given to sort, placing the sort
377c478bd9Sstevel@tonic-gate  *   key specifiers in the internal representation used in fields.c.
387c478bd9Sstevel@tonic-gate  *
397c478bd9Sstevel@tonic-gate  * Equivalence of specifiers
407c478bd9Sstevel@tonic-gate  *   One of sort(1)'s standard peculiarities is the transformation of the
417c478bd9Sstevel@tonic-gate  *   character offsets and field numbering between the new and old style field
427c478bd9Sstevel@tonic-gate  *   specifications.  We simply quote from the Single Unix standard:
437c478bd9Sstevel@tonic-gate  *
447c478bd9Sstevel@tonic-gate  *	+w.xT -y.zU
457c478bd9Sstevel@tonic-gate  *
467c478bd9Sstevel@tonic-gate  *   is equivalent to
477c478bd9Sstevel@tonic-gate  *
48*101e15b5SRichard Lowe  *	undefined		when z == 0, U contains b, and -t is set
49*101e15b5SRichard Lowe  *	-k w+1.x+1T,y.0U	when z == 0 otherwise
50*101e15b5SRichard Lowe  *	-k w+1.x+1T,y+1.zU	when z > 0
517c478bd9Sstevel@tonic-gate  *
527c478bd9Sstevel@tonic-gate  *   Undoubtedly, this seemed logical at the time.  (Using only the field head
537c478bd9Sstevel@tonic-gate  *   as the coordinate, as done in the obsolete version, seems much simpler.)
547c478bd9Sstevel@tonic-gate  *   The reverse map is where the key specifier
557c478bd9Sstevel@tonic-gate  *
567c478bd9Sstevel@tonic-gate  *	-k w.xT,y.zU
577c478bd9Sstevel@tonic-gate  *
587c478bd9Sstevel@tonic-gate  *   is equivalent to
597c478bd9Sstevel@tonic-gate  *
60*101e15b5SRichard Lowe  *	undefined		when z == 0, U contains b, and -t is set
617c478bd9Sstevel@tonic-gate  *	+w-1.x-1T,y.0U		when z == 0 otherwise
627c478bd9Sstevel@tonic-gate  *	+w-1.x-1T,y-1.z		when z > 0
637c478bd9Sstevel@tonic-gate  *
647c478bd9Sstevel@tonic-gate  *   in the obsolete syntax.  Because the original key specifiers lead to a
657c478bd9Sstevel@tonic-gate  *   simpler implementation, the internal representation of a field in this
667c478bd9Sstevel@tonic-gate  *   implementation of sort is mostly that given by the obsolete syntax.
677c478bd9Sstevel@tonic-gate  */
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate /*
707c478bd9Sstevel@tonic-gate  * While a key specifier in the obsolete +m ... -n form is being defined (that
717c478bd9Sstevel@tonic-gate  * is, before the closing -n is seen), a narrower set of options is permitted.
727c478bd9Sstevel@tonic-gate  * We specify this smaller set of options in OLD_SPEC_OPTIONS_STRING.
737c478bd9Sstevel@tonic-gate  */
747c478bd9Sstevel@tonic-gate #define	OPTIONS_STRING		"cmuo:T:z:dfiMnrbt:k:S:0123456789"
757c478bd9Sstevel@tonic-gate #define	OLD_SPEC_OPTIONS_STRING	"bdfiMnrcmuo:T:z:t:k:S:"
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate #define	OPTIONS_OLDSPEC		0x1	/* else new-style spec */
787c478bd9Sstevel@tonic-gate #define	OPTIONS_STARTSPEC	0x2	/* else end spec */
797c478bd9Sstevel@tonic-gate 
807c478bd9Sstevel@tonic-gate static int
is_number(char * C)817c478bd9Sstevel@tonic-gate is_number(char *C)
827c478bd9Sstevel@tonic-gate {
837c478bd9Sstevel@tonic-gate 	size_t	i;
847c478bd9Sstevel@tonic-gate 
857c478bd9Sstevel@tonic-gate 	for (i = 0; i < strlen(C); i++)
867c478bd9Sstevel@tonic-gate 		if (!isdigit((uchar_t)C[i]))
877c478bd9Sstevel@tonic-gate 			return (0);
887c478bd9Sstevel@tonic-gate 
897c478bd9Sstevel@tonic-gate 	return (1);
907c478bd9Sstevel@tonic-gate }
917c478bd9Sstevel@tonic-gate 
927c478bd9Sstevel@tonic-gate /*
937c478bd9Sstevel@tonic-gate  * If a field specified by the -k option or by the +n syntax contains any
947c478bd9Sstevel@tonic-gate  * modifiers, then the current global field modifiers are not inherited.
957c478bd9Sstevel@tonic-gate  */
967c478bd9Sstevel@tonic-gate static int
field_spec_has_modifiers(char * C,int length)977c478bd9Sstevel@tonic-gate field_spec_has_modifiers(char *C, int length)
987c478bd9Sstevel@tonic-gate {
997c478bd9Sstevel@tonic-gate 	int p_nonmodifiers = strspn(C, ",.1234567890");
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate 	if (p_nonmodifiers == length)
1027c478bd9Sstevel@tonic-gate 		return (0);
1037c478bd9Sstevel@tonic-gate 
1047c478bd9Sstevel@tonic-gate 	return (1);
1057c478bd9Sstevel@tonic-gate }
1067c478bd9Sstevel@tonic-gate 
1077c478bd9Sstevel@tonic-gate static void
field_apply_all(field_t * fc,flag_t flags)1087c478bd9Sstevel@tonic-gate field_apply_all(field_t *fc, flag_t flags)
1097c478bd9Sstevel@tonic-gate {
1107c478bd9Sstevel@tonic-gate 	field_t *f;
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate 	for (f = fc; f; f = f->f_next)
1137c478bd9Sstevel@tonic-gate 		if ((f->f_options & FIELD_MODIFIERS_DEFINED) == 0)
1147c478bd9Sstevel@tonic-gate 			f->f_options |= flags;
1157c478bd9Sstevel@tonic-gate }
1167c478bd9Sstevel@tonic-gate 
1177c478bd9Sstevel@tonic-gate static int
parse_field_spec(field_t * F,char * C,int flags,int length)1187c478bd9Sstevel@tonic-gate parse_field_spec(field_t *F, char *C, int flags, int length)
1197c478bd9Sstevel@tonic-gate {
1207c478bd9Sstevel@tonic-gate 	int p_period = MIN(length, strcspn(C, "."));
1217c478bd9Sstevel@tonic-gate 	int p_modifiers = MIN(length, strspn(C, ".1234567890"));
1227c478bd9Sstevel@tonic-gate 	int p_boundary = MIN(p_period, p_modifiers);
1237c478bd9Sstevel@tonic-gate 	int field = 0;
1247c478bd9Sstevel@tonic-gate 	int offset = 0;
1257c478bd9Sstevel@tonic-gate 	int offset_seen = 0;
1267c478bd9Sstevel@tonic-gate 	int i;
1277c478bd9Sstevel@tonic-gate 	int blanks_flag = 0;
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate 	for (i = 0; i < p_boundary; i++) {
1307c478bd9Sstevel@tonic-gate 		if (isdigit((uchar_t)C[i]))
1317c478bd9Sstevel@tonic-gate 			field = (10 * field) + (C[i] - '0');
1327c478bd9Sstevel@tonic-gate 		else
1337c478bd9Sstevel@tonic-gate 			return (1);
1347c478bd9Sstevel@tonic-gate 	}
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate 	if (p_period < p_modifiers) {
1377c478bd9Sstevel@tonic-gate 		for (i = p_period + 1; i < p_modifiers; i++) {
1387c478bd9Sstevel@tonic-gate 			if (isdigit((uchar_t)C[i])) {
1397c478bd9Sstevel@tonic-gate 				offset_seen++;
1407c478bd9Sstevel@tonic-gate 				offset = (10 * offset) + (C[i] - '0');
1417c478bd9Sstevel@tonic-gate 			} else {
1427c478bd9Sstevel@tonic-gate 				return (1);
1437c478bd9Sstevel@tonic-gate 			}
1447c478bd9Sstevel@tonic-gate 		}
1457c478bd9Sstevel@tonic-gate 	}
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate 	if (p_modifiers < length) {
1487c478bd9Sstevel@tonic-gate 		for (i = p_modifiers; i < length; i++) {
1497c478bd9Sstevel@tonic-gate 			switch (C[i]) {
1507c478bd9Sstevel@tonic-gate 				case 'b':
1517c478bd9Sstevel@tonic-gate 					blanks_flag = 1;
1527c478bd9Sstevel@tonic-gate 					break;
1537c478bd9Sstevel@tonic-gate 				case 'd':
1547c478bd9Sstevel@tonic-gate 					F->f_options |= FIELD_DICTIONARY_ORDER;
1557c478bd9Sstevel@tonic-gate 					break;
1567c478bd9Sstevel@tonic-gate 				case 'f':
1577c478bd9Sstevel@tonic-gate 					F->f_options |= FIELD_FOLD_UPPERCASE;
1587c478bd9Sstevel@tonic-gate 					break;
1597c478bd9Sstevel@tonic-gate 				case 'i':
1607c478bd9Sstevel@tonic-gate 					F->f_options |=
1617c478bd9Sstevel@tonic-gate 					    FIELD_IGNORE_NONPRINTABLES;
1627c478bd9Sstevel@tonic-gate 					break;
1637c478bd9Sstevel@tonic-gate 				case 'M':
1647c478bd9Sstevel@tonic-gate 					F->f_species = MONTH;
1657c478bd9Sstevel@tonic-gate 					break;
1667c478bd9Sstevel@tonic-gate 				case 'n':
1677c478bd9Sstevel@tonic-gate 					F->f_species = NUMERIC;
1687c478bd9Sstevel@tonic-gate 					break;
1697c478bd9Sstevel@tonic-gate 				case 'r':
1707c478bd9Sstevel@tonic-gate 					F->f_options |=
1717c478bd9Sstevel@tonic-gate 					    FIELD_REVERSE_COMPARISONS;
1727c478bd9Sstevel@tonic-gate 					break;
1737c478bd9Sstevel@tonic-gate 				default:
1747c478bd9Sstevel@tonic-gate 					usage();
1757c478bd9Sstevel@tonic-gate 					break;
1767c478bd9Sstevel@tonic-gate 			}
1777c478bd9Sstevel@tonic-gate 		}
1787c478bd9Sstevel@tonic-gate 	}
1797c478bd9Sstevel@tonic-gate 
1807c478bd9Sstevel@tonic-gate 	if (flags & OPTIONS_STARTSPEC) {
1817c478bd9Sstevel@tonic-gate 		F->f_start_field = field;
1827c478bd9Sstevel@tonic-gate 		F->f_start_offset = offset;
1837c478bd9Sstevel@tonic-gate 		if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC) {
1847c478bd9Sstevel@tonic-gate 			F->f_start_field--;
1857c478bd9Sstevel@tonic-gate 			if (offset_seen)
1867c478bd9Sstevel@tonic-gate 				F->f_start_offset--;
1877c478bd9Sstevel@tonic-gate 		}
1887c478bd9Sstevel@tonic-gate 		F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_START : 0;
1897c478bd9Sstevel@tonic-gate 	} else {
1907c478bd9Sstevel@tonic-gate 		F->f_end_field = field;
1917c478bd9Sstevel@tonic-gate 		F->f_end_offset = offset;
1927c478bd9Sstevel@tonic-gate 		if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC &&
1937c478bd9Sstevel@tonic-gate 		    offset_seen && offset != 0)
1947c478bd9Sstevel@tonic-gate 			F->f_end_field--;
1957c478bd9Sstevel@tonic-gate 		F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_END : 0;
1967c478bd9Sstevel@tonic-gate 	}
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate 	return (0);
1997c478bd9Sstevel@tonic-gate }
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate static void
parse_new_field_spec(sort_t * S,char * arg)2027c478bd9Sstevel@tonic-gate parse_new_field_spec(sort_t *S, char *arg)
2037c478bd9Sstevel@tonic-gate {
2047c478bd9Sstevel@tonic-gate 	int length = strlen(arg);
2057c478bd9Sstevel@tonic-gate 	int p_comma = MIN(length, strcspn(arg, ","));
2067c478bd9Sstevel@tonic-gate 	field_t *nF;
2077c478bd9Sstevel@tonic-gate 	int p;
2087c478bd9Sstevel@tonic-gate 
2097c478bd9Sstevel@tonic-gate 	/*
2107c478bd9Sstevel@tonic-gate 	 * New field specifiers do not inherit from the general specifier if
2117c478bd9Sstevel@tonic-gate 	 * they have any modifiers set.  (This is specifically tested in the VSC
2127c478bd9Sstevel@tonic-gate 	 * test suite, assertion 32 for POSIX.cmd/sort.)
2137c478bd9Sstevel@tonic-gate 	 */
2147c478bd9Sstevel@tonic-gate 	if (field_spec_has_modifiers(arg, length)) {
2157c478bd9Sstevel@tonic-gate 		nF = field_new(NULL);
2167c478bd9Sstevel@tonic-gate 		nF->f_options = FIELD_MODIFIERS_DEFINED;
2177c478bd9Sstevel@tonic-gate 	} else {
2187c478bd9Sstevel@tonic-gate 		nF = field_new(S);
2197c478bd9Sstevel@tonic-gate 	}
2207c478bd9Sstevel@tonic-gate 	p = parse_field_spec(nF, arg, OPTIONS_STARTSPEC, p_comma);
2217c478bd9Sstevel@tonic-gate 
2227c478bd9Sstevel@tonic-gate 	if (p != 0)
2237c478bd9Sstevel@tonic-gate 		usage();
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate 	if (p_comma < length) {
2267c478bd9Sstevel@tonic-gate 		p = parse_field_spec(nF, &(arg[p_comma + 1]), 0,
2277c478bd9Sstevel@tonic-gate 		    strlen(&(arg[p_comma + 1])));
2287c478bd9Sstevel@tonic-gate 		if (p != 0)
2297c478bd9Sstevel@tonic-gate 			usage();
2307c478bd9Sstevel@tonic-gate 	}
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate 	if (nF->f_start_field < 0 || nF->f_start_offset < 0) {
2337c478bd9Sstevel@tonic-gate 		if (S->m_verbose)
2347c478bd9Sstevel@tonic-gate 			warn("-k %s is not a supported field specifier\n", arg);
2357c478bd9Sstevel@tonic-gate 	}
2367c478bd9Sstevel@tonic-gate 	nF->f_start_field = MAX(nF->f_start_field, 0);
2377c478bd9Sstevel@tonic-gate 	nF->f_start_offset = MAX(nF->f_start_offset, 0);
2387c478bd9Sstevel@tonic-gate 
2397c478bd9Sstevel@tonic-gate 	/*
2407c478bd9Sstevel@tonic-gate 	 * If the starting field exceeds a defined ending field, convention
2417c478bd9Sstevel@tonic-gate 	 * dictates that the field is ignored.
2427c478bd9Sstevel@tonic-gate 	 */
2437c478bd9Sstevel@tonic-gate 	if (nF->f_end_field == -1 || nF->f_start_field < nF->f_end_field ||
2447c478bd9Sstevel@tonic-gate 	    (nF->f_start_field == nF->f_end_field &&
2457c478bd9Sstevel@tonic-gate 	    nF->f_start_offset < nF->f_end_offset)) {
2467c478bd9Sstevel@tonic-gate 		field_add_to_chain(&(S->m_fields_head), nF);
2477c478bd9Sstevel@tonic-gate 	} else if (S->m_verbose) {
2487c478bd9Sstevel@tonic-gate 		warn("illegal field -k %s omitted", arg);
2497c478bd9Sstevel@tonic-gate 	}
2507c478bd9Sstevel@tonic-gate }
2517c478bd9Sstevel@tonic-gate 
2527c478bd9Sstevel@tonic-gate /*
2537c478bd9Sstevel@tonic-gate  * parse_old_field_spec() is getopt()-aware; it may modify the values of optind,
2547c478bd9Sstevel@tonic-gate  * optarg, and so forth, to correctly determine the characteristics being
2557c478bd9Sstevel@tonic-gate  * assigned to the current field.
2567c478bd9Sstevel@tonic-gate  */
2577c478bd9Sstevel@tonic-gate static int
parse_old_field_spec(sort_t * S,int argc,char * argv[])2587c478bd9Sstevel@tonic-gate parse_old_field_spec(sort_t *S, int argc, char *argv[])
2597c478bd9Sstevel@tonic-gate {
2607c478bd9Sstevel@tonic-gate 	field_t *nF;
2617c478bd9Sstevel@tonic-gate 	int c, p;
2627c478bd9Sstevel@tonic-gate 	char *arg = argv[optind];
2637c478bd9Sstevel@tonic-gate 
2647c478bd9Sstevel@tonic-gate 	if (field_spec_has_modifiers(arg + 1, strlen(arg + 1))) {
2657c478bd9Sstevel@tonic-gate 		nF = field_new(NULL);
2667c478bd9Sstevel@tonic-gate 		nF->f_options = FIELD_MODIFIERS_DEFINED;
2677c478bd9Sstevel@tonic-gate 	} else {
2687c478bd9Sstevel@tonic-gate 		nF = field_new(S);
2697c478bd9Sstevel@tonic-gate 	}
2707c478bd9Sstevel@tonic-gate 
2717c478bd9Sstevel@tonic-gate 	p = parse_field_spec(nF, arg + 1, OPTIONS_OLDSPEC | OPTIONS_STARTSPEC,
2727c478bd9Sstevel@tonic-gate 	    strlen(arg + 1));
2737c478bd9Sstevel@tonic-gate 
2747c478bd9Sstevel@tonic-gate 	if (p != 0) {
2757c478bd9Sstevel@tonic-gate 		field_delete(nF);
2767c478bd9Sstevel@tonic-gate 		return (0);
2777c478bd9Sstevel@tonic-gate 	}
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	/*
2807c478bd9Sstevel@tonic-gate 	 * In the case that getopt() returns '?' (unrecognized option) or EOF
2817c478bd9Sstevel@tonic-gate 	 * (non-option argument), the field is considered closed.
2827c478bd9Sstevel@tonic-gate 	 */
2837c478bd9Sstevel@tonic-gate 	for (arg = argv[++optind]; optind < argc; arg = argv[optind]) {
2847c478bd9Sstevel@tonic-gate 		if (strlen(arg) >= 2 && *arg == '-' &&
2857c478bd9Sstevel@tonic-gate 		    isdigit(*(uchar_t *)(arg + 1))) {
2867c478bd9Sstevel@tonic-gate 			(void) parse_field_spec(nF, arg + 1,
2877c478bd9Sstevel@tonic-gate 			    OPTIONS_OLDSPEC, strlen(arg) - 1);
2887c478bd9Sstevel@tonic-gate 			field_add_to_chain(&(S->m_fields_head), nF);
2897c478bd9Sstevel@tonic-gate 			optind++;
2907c478bd9Sstevel@tonic-gate 			return (1);
2917c478bd9Sstevel@tonic-gate 		}
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate 		if ((c = getopt(argc, argv, OLD_SPEC_OPTIONS_STRING)) != EOF) {
2947c478bd9Sstevel@tonic-gate 			switch (c) {
2957c478bd9Sstevel@tonic-gate 			case 'b':
2967c478bd9Sstevel@tonic-gate 				nF->f_options |= FIELD_IGNORE_BLANKS_START;
2977c478bd9Sstevel@tonic-gate 				break;
2987c478bd9Sstevel@tonic-gate 			case 'd':
2997c478bd9Sstevel@tonic-gate 				nF->f_options |= FIELD_DICTIONARY_ORDER;
3007c478bd9Sstevel@tonic-gate 				break;
3017c478bd9Sstevel@tonic-gate 			case 'f':
3027c478bd9Sstevel@tonic-gate 				nF->f_options |= FIELD_FOLD_UPPERCASE;
3037c478bd9Sstevel@tonic-gate 				break;
3047c478bd9Sstevel@tonic-gate 			case 'i':
3057c478bd9Sstevel@tonic-gate 				nF->f_options |= FIELD_IGNORE_NONPRINTABLES;
3067c478bd9Sstevel@tonic-gate 				break;
3077c478bd9Sstevel@tonic-gate 			case 'M':
3087c478bd9Sstevel@tonic-gate 				nF->f_species = MONTH;
3097c478bd9Sstevel@tonic-gate 				break;
3107c478bd9Sstevel@tonic-gate 			case 'n':
3117c478bd9Sstevel@tonic-gate 				nF->f_species = NUMERIC;
3127c478bd9Sstevel@tonic-gate 				break;
3137c478bd9Sstevel@tonic-gate 			case 'r':
3147c478bd9Sstevel@tonic-gate 				nF->f_options |= FIELD_REVERSE_COMPARISONS;
3157c478bd9Sstevel@tonic-gate 				break;
3167c478bd9Sstevel@tonic-gate 			case '?':
3177c478bd9Sstevel@tonic-gate 			case 'c':
3187c478bd9Sstevel@tonic-gate 			case 'm':
3197c478bd9Sstevel@tonic-gate 			case 'u':
3207c478bd9Sstevel@tonic-gate 				/*
3217c478bd9Sstevel@tonic-gate 				 * Options without arguments.
3227c478bd9Sstevel@tonic-gate 				 */
3237c478bd9Sstevel@tonic-gate 				optind -= 1;
3247c478bd9Sstevel@tonic-gate 				field_add_to_chain(&(S->m_fields_head), nF);
3257c478bd9Sstevel@tonic-gate 				return (1);
3267c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
3277c478bd9Sstevel@tonic-gate 			case 'o':
3287c478bd9Sstevel@tonic-gate 			case 'T':
3297c478bd9Sstevel@tonic-gate 			case 'z':
3307c478bd9Sstevel@tonic-gate 			case 't':
3317c478bd9Sstevel@tonic-gate 			case 'k':
3327c478bd9Sstevel@tonic-gate 			case 'S':
3337c478bd9Sstevel@tonic-gate 				/*
3347c478bd9Sstevel@tonic-gate 				 * Options with arguments.
3357c478bd9Sstevel@tonic-gate 				 */
3367c478bd9Sstevel@tonic-gate 				if (optarg == argv[optind - 1] + 2) {
3377c478bd9Sstevel@tonic-gate 					optind -= 1;
3387c478bd9Sstevel@tonic-gate 				} else {
3397c478bd9Sstevel@tonic-gate 					optind -= 2;
3407c478bd9Sstevel@tonic-gate 				}
3417c478bd9Sstevel@tonic-gate 				field_add_to_chain(&(S->m_fields_head), nF);
3427c478bd9Sstevel@tonic-gate 				return (1);
3437c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
3447c478bd9Sstevel@tonic-gate 			default:
3457c478bd9Sstevel@tonic-gate 				die(EMSG_UNKN_OPTION);
3467c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
3477c478bd9Sstevel@tonic-gate 			}
3487c478bd9Sstevel@tonic-gate 		} else {
3497c478bd9Sstevel@tonic-gate 			break;
3507c478bd9Sstevel@tonic-gate 		}
3517c478bd9Sstevel@tonic-gate 	}
3527c478bd9Sstevel@tonic-gate 
3537c478bd9Sstevel@tonic-gate 	field_add_to_chain(&(S->m_fields_head), nF);
3547c478bd9Sstevel@tonic-gate 	return (1);
3557c478bd9Sstevel@tonic-gate }
3567c478bd9Sstevel@tonic-gate 
3577c478bd9Sstevel@tonic-gate int
options(sort_t * S,int argc,char * argv[])3587c478bd9Sstevel@tonic-gate options(sort_t *S, int argc, char *argv[])
3597c478bd9Sstevel@tonic-gate {
3607c478bd9Sstevel@tonic-gate 	int c;
3617c478bd9Sstevel@tonic-gate 
3627c478bd9Sstevel@tonic-gate 	optind = 1;
3637c478bd9Sstevel@tonic-gate 	while (optind < argc) {
3647c478bd9Sstevel@tonic-gate 		if (strncmp("-y", argv[optind], strlen("-y")) == 0) {
3657c478bd9Sstevel@tonic-gate 			/*
3667c478bd9Sstevel@tonic-gate 			 * The -y [kmem] option violates the standard syntax
3677c478bd9Sstevel@tonic-gate 			 * outlined in intro(1).  we have to be a little fancy
3687c478bd9Sstevel@tonic-gate 			 * to determine if the next argument is a valid integer.
3697c478bd9Sstevel@tonic-gate 			 * (note, of course, that the previous sort(1) had no
3707c478bd9Sstevel@tonic-gate 			 * mechanism to resolve a final
3717c478bd9Sstevel@tonic-gate 			 *	-y 99999
3727c478bd9Sstevel@tonic-gate 			 * into
3737c478bd9Sstevel@tonic-gate 			 *	-y, file 99999
3747c478bd9Sstevel@tonic-gate 			 * or
3757c478bd9Sstevel@tonic-gate 			 *	-y 99999, file stdin
3767c478bd9Sstevel@tonic-gate 			 *
3777c478bd9Sstevel@tonic-gate 			 * Now one can unambiguously use
3787c478bd9Sstevel@tonic-gate 			 *	-y -- 99999
3797c478bd9Sstevel@tonic-gate 			 * and
3807c478bd9Sstevel@tonic-gate 			 *	-y 99999 -
3817c478bd9Sstevel@tonic-gate 			 * to distinguish these cases.
3827c478bd9Sstevel@tonic-gate 			 *
3837c478bd9Sstevel@tonic-gate 			 * That said, we do not use the information passed using
3847c478bd9Sstevel@tonic-gate 			 * -y option in sort(1); we provide the argument to
3857c478bd9Sstevel@tonic-gate 			 * preserve compatibility for existing scripts.
3867c478bd9Sstevel@tonic-gate 			 */
3877c478bd9Sstevel@tonic-gate 			if (strlen(argv[optind]) == strlen("-y") &&
3887c478bd9Sstevel@tonic-gate 			    optind + 1 < argc &&
3897c478bd9Sstevel@tonic-gate 			    is_number(argv[optind + 1]))
3907c478bd9Sstevel@tonic-gate 				optind += 2;
3917c478bd9Sstevel@tonic-gate 			else
3927c478bd9Sstevel@tonic-gate 				optind += 1;
3937c478bd9Sstevel@tonic-gate 		}
3947c478bd9Sstevel@tonic-gate 
3957c478bd9Sstevel@tonic-gate 		if ((c = getopt(argc, argv, OPTIONS_STRING)) != EOF) {
3967c478bd9Sstevel@tonic-gate 			switch (c) {
3977c478bd9Sstevel@tonic-gate 			case 'c':
3987c478bd9Sstevel@tonic-gate 				S->m_check_if_sorted_only = 1;
3997c478bd9Sstevel@tonic-gate 				break;
4007c478bd9Sstevel@tonic-gate 
4017c478bd9Sstevel@tonic-gate 			case 'm':
4027c478bd9Sstevel@tonic-gate 				S->m_merge_only = 1;
4037c478bd9Sstevel@tonic-gate 				break;
4047c478bd9Sstevel@tonic-gate 
4057c478bd9Sstevel@tonic-gate 			case 'u':
4067c478bd9Sstevel@tonic-gate 				S->m_unique_lines = 1;
4077c478bd9Sstevel@tonic-gate 				break;
4087c478bd9Sstevel@tonic-gate 
4097c478bd9Sstevel@tonic-gate 			case 'o':
4107c478bd9Sstevel@tonic-gate 				S->m_output_filename = optarg;
4117c478bd9Sstevel@tonic-gate 				break;
4127c478bd9Sstevel@tonic-gate 
4137c478bd9Sstevel@tonic-gate 			case 'T':
4147c478bd9Sstevel@tonic-gate 				S->m_tmpdir_template = optarg;
4157c478bd9Sstevel@tonic-gate 				break;
4167c478bd9Sstevel@tonic-gate 
4177c478bd9Sstevel@tonic-gate 			case 'z':
4187c478bd9Sstevel@tonic-gate 				/*
4197c478bd9Sstevel@tonic-gate 				 * ignore optarg -- obsolete
4207c478bd9Sstevel@tonic-gate 				 */
4217c478bd9Sstevel@tonic-gate 				break;
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate 			case 'd':
4247c478bd9Sstevel@tonic-gate 				S->m_field_options |= FIELD_DICTIONARY_ORDER;
4257c478bd9Sstevel@tonic-gate 				field_apply_all(S->m_fields_head,
4267c478bd9Sstevel@tonic-gate 				    FIELD_DICTIONARY_ORDER);
4277c478bd9Sstevel@tonic-gate 				break;
4287c478bd9Sstevel@tonic-gate 
4297c478bd9Sstevel@tonic-gate 			case 'f':
4307c478bd9Sstevel@tonic-gate 				S->m_field_options |= FIELD_FOLD_UPPERCASE;
4317c478bd9Sstevel@tonic-gate 				field_apply_all(S->m_fields_head,
4327c478bd9Sstevel@tonic-gate 				    FIELD_FOLD_UPPERCASE);
4337c478bd9Sstevel@tonic-gate 				break;
4347c478bd9Sstevel@tonic-gate 
4357c478bd9Sstevel@tonic-gate 			case 'i':
4367c478bd9Sstevel@tonic-gate 				S->m_field_options |=
4377c478bd9Sstevel@tonic-gate 				    FIELD_IGNORE_NONPRINTABLES;
4387c478bd9Sstevel@tonic-gate 				field_apply_all(S->m_fields_head,
4397c478bd9Sstevel@tonic-gate 				    FIELD_IGNORE_NONPRINTABLES);
4407c478bd9Sstevel@tonic-gate 				break;
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 			case 'M':
4437c478bd9Sstevel@tonic-gate 				S->m_default_species = MONTH;
4447c478bd9Sstevel@tonic-gate 				S->m_field_options &=
4457c478bd9Sstevel@tonic-gate 				    ~FIELD_IGNORE_BLANKS_START;
4467c478bd9Sstevel@tonic-gate 				break;
4477c478bd9Sstevel@tonic-gate 
4487c478bd9Sstevel@tonic-gate 			case 'n':
4497c478bd9Sstevel@tonic-gate 				S->m_default_species = NUMERIC;
4507c478bd9Sstevel@tonic-gate 				{
4517c478bd9Sstevel@tonic-gate 					field_t *f;
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate 					for (f = S->m_fields_head; f;
4547c478bd9Sstevel@tonic-gate 					    f = f->f_next)
4557c478bd9Sstevel@tonic-gate 						if ((f->f_options &
4567c478bd9Sstevel@tonic-gate 						    FIELD_MODIFIERS_DEFINED) ==
4577c478bd9Sstevel@tonic-gate 						    0)
4587c478bd9Sstevel@tonic-gate 							f->f_species = NUMERIC;
4597c478bd9Sstevel@tonic-gate 				}
4607c478bd9Sstevel@tonic-gate 				break;
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate 			case 'b':
4637c478bd9Sstevel@tonic-gate 				S->m_field_options |=
4647c478bd9Sstevel@tonic-gate 				    FIELD_IGNORE_BLANKS_START |
4657c478bd9Sstevel@tonic-gate 				    FIELD_IGNORE_BLANKS_END;
4667c478bd9Sstevel@tonic-gate 				break;
4677c478bd9Sstevel@tonic-gate 
4687c478bd9Sstevel@tonic-gate 			case 'r':
4697c478bd9Sstevel@tonic-gate 				S->m_field_options |=
4707c478bd9Sstevel@tonic-gate 				    FIELD_REVERSE_COMPARISONS;
4717c478bd9Sstevel@tonic-gate 				field_apply_all(S->m_fields_head,
4727c478bd9Sstevel@tonic-gate 				    FIELD_REVERSE_COMPARISONS);
4737c478bd9Sstevel@tonic-gate 				break;
4747c478bd9Sstevel@tonic-gate 
4757c478bd9Sstevel@tonic-gate 			case 't':
4767c478bd9Sstevel@tonic-gate 				/*
4777c478bd9Sstevel@tonic-gate 				 * delimiter
4787c478bd9Sstevel@tonic-gate 				 */
4797c478bd9Sstevel@tonic-gate 				if (S->m_single_byte_locale) {
4807c478bd9Sstevel@tonic-gate 					/*
4817c478bd9Sstevel@tonic-gate 					 * Most debuggers can't take tabs as
4827c478bd9Sstevel@tonic-gate 					 * input arguments, so we provide an
4837c478bd9Sstevel@tonic-gate 					 * escape sequence to allow testing of
4847c478bd9Sstevel@tonic-gate 					 * this special case for the DEBUG
4857c478bd9Sstevel@tonic-gate 					 * version.
4867c478bd9Sstevel@tonic-gate 					 */
4877c478bd9Sstevel@tonic-gate 					S->m_field_separator.sc =
4887c478bd9Sstevel@tonic-gate #ifdef DEBUG
4897c478bd9Sstevel@tonic-gate 					    xstreql(optarg, "\\t") ? '\t' :
4907c478bd9Sstevel@tonic-gate #endif
4917c478bd9Sstevel@tonic-gate 					    optarg[0];
4927c478bd9Sstevel@tonic-gate 				} else
4937c478bd9Sstevel@tonic-gate 					(void) mbtowc(&S->m_field_separator.wc,
4947c478bd9Sstevel@tonic-gate 					    optarg, MB_CUR_MAX);
4957c478bd9Sstevel@tonic-gate 				break;
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 			case 'k':
4987c478bd9Sstevel@tonic-gate 				/*
4997c478bd9Sstevel@tonic-gate 				 * key
5007c478bd9Sstevel@tonic-gate 				 */
5017c478bd9Sstevel@tonic-gate 				(void) parse_new_field_spec(S, optarg);
5027c478bd9Sstevel@tonic-gate 				break;
5037c478bd9Sstevel@tonic-gate 
5047c478bd9Sstevel@tonic-gate 			case 'S':
5057c478bd9Sstevel@tonic-gate 				S->m_memory_limit = strtomem(optarg);
5067c478bd9Sstevel@tonic-gate #ifdef DEBUG
5077c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr, CMDNAME
5087c478bd9Sstevel@tonic-gate 				    ": limiting size to %d bytes\n",
5097c478bd9Sstevel@tonic-gate 				    S->m_memory_limit);
5107c478bd9Sstevel@tonic-gate #endif /* DEBUG */
5117c478bd9Sstevel@tonic-gate 				break;
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 			/*
5147c478bd9Sstevel@tonic-gate 			 * We never take a naked -999; these should always be
5157c478bd9Sstevel@tonic-gate 			 * associated with a preceding +000.
5167c478bd9Sstevel@tonic-gate 			 */
5177c478bd9Sstevel@tonic-gate 			case '0':
5187c478bd9Sstevel@tonic-gate 			case '1':
5197c478bd9Sstevel@tonic-gate 			case '2':
5207c478bd9Sstevel@tonic-gate 			case '3':
5217c478bd9Sstevel@tonic-gate 			case '4':
5227c478bd9Sstevel@tonic-gate 			case '5':
5237c478bd9Sstevel@tonic-gate 			case '6':
5247c478bd9Sstevel@tonic-gate 			case '7':
5257c478bd9Sstevel@tonic-gate 			case '8':
5267c478bd9Sstevel@tonic-gate 			case '9':
5277c478bd9Sstevel@tonic-gate 				usage();
5287c478bd9Sstevel@tonic-gate 				break;
5297c478bd9Sstevel@tonic-gate 			case '?':
5307c478bd9Sstevel@tonic-gate 				/* error case */
5317c478bd9Sstevel@tonic-gate 				usage();
5327c478bd9Sstevel@tonic-gate 				break;
5337c478bd9Sstevel@tonic-gate 			}
5347c478bd9Sstevel@tonic-gate 
5357c478bd9Sstevel@tonic-gate 			/*
5367c478bd9Sstevel@tonic-gate 			 * Go back for next argument.
5377c478bd9Sstevel@tonic-gate 			 */
5387c478bd9Sstevel@tonic-gate 			continue;
5397c478bd9Sstevel@tonic-gate 		}
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate 		/*
5427c478bd9Sstevel@tonic-gate 		 * There are three (interpretable) possibilities for getopt() to
5437c478bd9Sstevel@tonic-gate 		 * return EOF with arguments on the command line: we have seen
5447c478bd9Sstevel@tonic-gate 		 * the "end-of-options" token, --, we have encountered the
5457c478bd9Sstevel@tonic-gate 		 * old-style field definition, +NNN, or we have found a
5467c478bd9Sstevel@tonic-gate 		 * filename.
5477c478bd9Sstevel@tonic-gate 		 *
5487c478bd9Sstevel@tonic-gate 		 * In the second case, we must also search for the optional -NNN
5497c478bd9Sstevel@tonic-gate 		 * field terminal definition.  (since "+joe", for instance, is
5507c478bd9Sstevel@tonic-gate 		 * a valid filename, we must handle this pattern as well.)  This
5517c478bd9Sstevel@tonic-gate 		 * is performed by parse_old_field_spec().
5527c478bd9Sstevel@tonic-gate 		 */
5537c478bd9Sstevel@tonic-gate 		if (xstreql(argv[optind - 1], "--")) {
5547c478bd9Sstevel@tonic-gate 			/*
5557c478bd9Sstevel@tonic-gate 			 * Process all arguments following end-of-options token
5567c478bd9Sstevel@tonic-gate 			 * as filenames.
5577c478bd9Sstevel@tonic-gate 			 */
5587c478bd9Sstevel@tonic-gate 			while (optind < argc) {
5597c478bd9Sstevel@tonic-gate 				if (xstreql(argv[optind], "-"))
5607c478bd9Sstevel@tonic-gate 					S->m_input_from_stdin = 1;
5617c478bd9Sstevel@tonic-gate 				else
5627c478bd9Sstevel@tonic-gate 					stream_add_file_to_chain(
5637c478bd9Sstevel@tonic-gate 					    &(S->m_input_streams),
5647c478bd9Sstevel@tonic-gate 					    argv[optind]);
5657c478bd9Sstevel@tonic-gate 				optind++;
5667c478bd9Sstevel@tonic-gate 			}
5677c478bd9Sstevel@tonic-gate 
5687c478bd9Sstevel@tonic-gate 			break;
5697c478bd9Sstevel@tonic-gate 		}
5707c478bd9Sstevel@tonic-gate 
5717c478bd9Sstevel@tonic-gate 		if (optind < argc) {
5727c478bd9Sstevel@tonic-gate 			if (xstreql(argv[optind], "-")) {
5737c478bd9Sstevel@tonic-gate 				S->m_input_from_stdin = 1;
5747c478bd9Sstevel@tonic-gate 				optind++;
5757c478bd9Sstevel@tonic-gate 			} else if (*(argv[optind]) != '+' ||
5767c478bd9Sstevel@tonic-gate 			    !parse_old_field_spec(S, argc, argv)) {
5777c478bd9Sstevel@tonic-gate 				/*
5787c478bd9Sstevel@tonic-gate 				 * It's a filename, because it either doesn't
5797c478bd9Sstevel@tonic-gate 				 * start with '+', or if it did, it wasn't an
5807c478bd9Sstevel@tonic-gate 				 * actual field specifier.
5817c478bd9Sstevel@tonic-gate 				 */
5827c478bd9Sstevel@tonic-gate 				stream_add_file_to_chain(&(S->m_input_streams),
5837c478bd9Sstevel@tonic-gate 				    argv[optind]);
5847c478bd9Sstevel@tonic-gate 				optind++;
5857c478bd9Sstevel@tonic-gate 			}
5867c478bd9Sstevel@tonic-gate 		}
5877c478bd9Sstevel@tonic-gate 	}
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	if (S->m_input_streams == NULL)
5907c478bd9Sstevel@tonic-gate 		S->m_input_from_stdin = 1;
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 	if (S->m_output_filename == NULL)
5937c478bd9Sstevel@tonic-gate 		S->m_output_to_stdout = 1;
5947c478bd9Sstevel@tonic-gate 
5957c478bd9Sstevel@tonic-gate 	/*
5967c478bd9Sstevel@tonic-gate 	 * If no fields, then one great field.  However, if the -b option was
5977c478bd9Sstevel@tonic-gate 	 * set globally, be sure to ignore it, as per UNIX98.
5987c478bd9Sstevel@tonic-gate 	 */
5997c478bd9Sstevel@tonic-gate 	if (S->m_fields_head == NULL) {
6007c478bd9Sstevel@tonic-gate 		S->m_field_options &= ~FIELD_IGNORE_BLANKS_START;
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate 		(void) parse_new_field_spec(S, "1");
6037c478bd9Sstevel@tonic-gate 		/*
6047c478bd9Sstevel@tonic-gate 		 * "Entire line" fast path is only valid if no delimiter has
6057c478bd9Sstevel@tonic-gate 		 * been set and no modifiers have been applied.
6067c478bd9Sstevel@tonic-gate 		 */
6077c478bd9Sstevel@tonic-gate 		if (S->m_field_separator.wc == 0 &&
6087c478bd9Sstevel@tonic-gate 		    S->m_default_species == ALPHA &&
6097c478bd9Sstevel@tonic-gate 		    S->m_field_options == 0)
6107c478bd9Sstevel@tonic-gate 			S->m_entire_line = 1;
6117c478bd9Sstevel@tonic-gate 	}
6127c478bd9Sstevel@tonic-gate 
6137c478bd9Sstevel@tonic-gate 	return (0);
6147c478bd9Sstevel@tonic-gate }
615