17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate * with the License.
87c478bd9Sstevel@tonic-gate *
97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate * and limitations under the License.
137c478bd9Sstevel@tonic-gate *
147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate *
207c478bd9Sstevel@tonic-gate * CDDL HEADER END
217c478bd9Sstevel@tonic-gate */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
247c478bd9Sstevel@tonic-gate * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate */
267c478bd9Sstevel@tonic-gate
277c478bd9Sstevel@tonic-gate #include "options.h"
287c478bd9Sstevel@tonic-gate
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate * options
317c478bd9Sstevel@tonic-gate *
327c478bd9Sstevel@tonic-gate * Overview
337c478bd9Sstevel@tonic-gate * sort(1) supports two methods for specifying the sort key: the original,
347c478bd9Sstevel@tonic-gate * now-obsolete, +n -m form and the POSIX -k n,m form. We refer to the former
357c478bd9Sstevel@tonic-gate * as "old specifiers" and the latter as "new specifiers". The options()
367c478bd9Sstevel@tonic-gate * function parses the command line arguments given to sort, placing the sort
377c478bd9Sstevel@tonic-gate * key specifiers in the internal representation used in fields.c.
387c478bd9Sstevel@tonic-gate *
397c478bd9Sstevel@tonic-gate * Equivalence of specifiers
407c478bd9Sstevel@tonic-gate * One of sort(1)'s standard peculiarities is the transformation of the
417c478bd9Sstevel@tonic-gate * character offsets and field numbering between the new and old style field
427c478bd9Sstevel@tonic-gate * specifications. We simply quote from the Single Unix standard:
437c478bd9Sstevel@tonic-gate *
447c478bd9Sstevel@tonic-gate * +w.xT -y.zU
457c478bd9Sstevel@tonic-gate *
467c478bd9Sstevel@tonic-gate * is equivalent to
477c478bd9Sstevel@tonic-gate *
48*101e15b5SRichard Lowe * undefined when z == 0, U contains b, and -t is set
49*101e15b5SRichard Lowe * -k w+1.x+1T,y.0U when z == 0 otherwise
50*101e15b5SRichard Lowe * -k w+1.x+1T,y+1.zU when z > 0
517c478bd9Sstevel@tonic-gate *
527c478bd9Sstevel@tonic-gate * Undoubtedly, this seemed logical at the time. (Using only the field head
537c478bd9Sstevel@tonic-gate * as the coordinate, as done in the obsolete version, seems much simpler.)
547c478bd9Sstevel@tonic-gate * The reverse map is where the key specifier
557c478bd9Sstevel@tonic-gate *
567c478bd9Sstevel@tonic-gate * -k w.xT,y.zU
577c478bd9Sstevel@tonic-gate *
587c478bd9Sstevel@tonic-gate * is equivalent to
597c478bd9Sstevel@tonic-gate *
60*101e15b5SRichard Lowe * undefined when z == 0, U contains b, and -t is set
617c478bd9Sstevel@tonic-gate * +w-1.x-1T,y.0U when z == 0 otherwise
627c478bd9Sstevel@tonic-gate * +w-1.x-1T,y-1.z when z > 0
637c478bd9Sstevel@tonic-gate *
647c478bd9Sstevel@tonic-gate * in the obsolete syntax. Because the original key specifiers lead to a
657c478bd9Sstevel@tonic-gate * simpler implementation, the internal representation of a field in this
667c478bd9Sstevel@tonic-gate * implementation of sort is mostly that given by the obsolete syntax.
677c478bd9Sstevel@tonic-gate */
687c478bd9Sstevel@tonic-gate
697c478bd9Sstevel@tonic-gate /*
707c478bd9Sstevel@tonic-gate * While a key specifier in the obsolete +m ... -n form is being defined (that
717c478bd9Sstevel@tonic-gate * is, before the closing -n is seen), a narrower set of options is permitted.
727c478bd9Sstevel@tonic-gate * We specify this smaller set of options in OLD_SPEC_OPTIONS_STRING.
737c478bd9Sstevel@tonic-gate */
747c478bd9Sstevel@tonic-gate #define OPTIONS_STRING "cmuo:T:z:dfiMnrbt:k:S:0123456789"
757c478bd9Sstevel@tonic-gate #define OLD_SPEC_OPTIONS_STRING "bdfiMnrcmuo:T:z:t:k:S:"
767c478bd9Sstevel@tonic-gate
777c478bd9Sstevel@tonic-gate #define OPTIONS_OLDSPEC 0x1 /* else new-style spec */
787c478bd9Sstevel@tonic-gate #define OPTIONS_STARTSPEC 0x2 /* else end spec */
797c478bd9Sstevel@tonic-gate
807c478bd9Sstevel@tonic-gate static int
is_number(char * C)817c478bd9Sstevel@tonic-gate is_number(char *C)
827c478bd9Sstevel@tonic-gate {
837c478bd9Sstevel@tonic-gate size_t i;
847c478bd9Sstevel@tonic-gate
857c478bd9Sstevel@tonic-gate for (i = 0; i < strlen(C); i++)
867c478bd9Sstevel@tonic-gate if (!isdigit((uchar_t)C[i]))
877c478bd9Sstevel@tonic-gate return (0);
887c478bd9Sstevel@tonic-gate
897c478bd9Sstevel@tonic-gate return (1);
907c478bd9Sstevel@tonic-gate }
917c478bd9Sstevel@tonic-gate
927c478bd9Sstevel@tonic-gate /*
937c478bd9Sstevel@tonic-gate * If a field specified by the -k option or by the +n syntax contains any
947c478bd9Sstevel@tonic-gate * modifiers, then the current global field modifiers are not inherited.
957c478bd9Sstevel@tonic-gate */
967c478bd9Sstevel@tonic-gate static int
field_spec_has_modifiers(char * C,int length)977c478bd9Sstevel@tonic-gate field_spec_has_modifiers(char *C, int length)
987c478bd9Sstevel@tonic-gate {
997c478bd9Sstevel@tonic-gate int p_nonmodifiers = strspn(C, ",.1234567890");
1007c478bd9Sstevel@tonic-gate
1017c478bd9Sstevel@tonic-gate if (p_nonmodifiers == length)
1027c478bd9Sstevel@tonic-gate return (0);
1037c478bd9Sstevel@tonic-gate
1047c478bd9Sstevel@tonic-gate return (1);
1057c478bd9Sstevel@tonic-gate }
1067c478bd9Sstevel@tonic-gate
1077c478bd9Sstevel@tonic-gate static void
field_apply_all(field_t * fc,flag_t flags)1087c478bd9Sstevel@tonic-gate field_apply_all(field_t *fc, flag_t flags)
1097c478bd9Sstevel@tonic-gate {
1107c478bd9Sstevel@tonic-gate field_t *f;
1117c478bd9Sstevel@tonic-gate
1127c478bd9Sstevel@tonic-gate for (f = fc; f; f = f->f_next)
1137c478bd9Sstevel@tonic-gate if ((f->f_options & FIELD_MODIFIERS_DEFINED) == 0)
1147c478bd9Sstevel@tonic-gate f->f_options |= flags;
1157c478bd9Sstevel@tonic-gate }
1167c478bd9Sstevel@tonic-gate
1177c478bd9Sstevel@tonic-gate static int
parse_field_spec(field_t * F,char * C,int flags,int length)1187c478bd9Sstevel@tonic-gate parse_field_spec(field_t *F, char *C, int flags, int length)
1197c478bd9Sstevel@tonic-gate {
1207c478bd9Sstevel@tonic-gate int p_period = MIN(length, strcspn(C, "."));
1217c478bd9Sstevel@tonic-gate int p_modifiers = MIN(length, strspn(C, ".1234567890"));
1227c478bd9Sstevel@tonic-gate int p_boundary = MIN(p_period, p_modifiers);
1237c478bd9Sstevel@tonic-gate int field = 0;
1247c478bd9Sstevel@tonic-gate int offset = 0;
1257c478bd9Sstevel@tonic-gate int offset_seen = 0;
1267c478bd9Sstevel@tonic-gate int i;
1277c478bd9Sstevel@tonic-gate int blanks_flag = 0;
1287c478bd9Sstevel@tonic-gate
1297c478bd9Sstevel@tonic-gate for (i = 0; i < p_boundary; i++) {
1307c478bd9Sstevel@tonic-gate if (isdigit((uchar_t)C[i]))
1317c478bd9Sstevel@tonic-gate field = (10 * field) + (C[i] - '0');
1327c478bd9Sstevel@tonic-gate else
1337c478bd9Sstevel@tonic-gate return (1);
1347c478bd9Sstevel@tonic-gate }
1357c478bd9Sstevel@tonic-gate
1367c478bd9Sstevel@tonic-gate if (p_period < p_modifiers) {
1377c478bd9Sstevel@tonic-gate for (i = p_period + 1; i < p_modifiers; i++) {
1387c478bd9Sstevel@tonic-gate if (isdigit((uchar_t)C[i])) {
1397c478bd9Sstevel@tonic-gate offset_seen++;
1407c478bd9Sstevel@tonic-gate offset = (10 * offset) + (C[i] - '0');
1417c478bd9Sstevel@tonic-gate } else {
1427c478bd9Sstevel@tonic-gate return (1);
1437c478bd9Sstevel@tonic-gate }
1447c478bd9Sstevel@tonic-gate }
1457c478bd9Sstevel@tonic-gate }
1467c478bd9Sstevel@tonic-gate
1477c478bd9Sstevel@tonic-gate if (p_modifiers < length) {
1487c478bd9Sstevel@tonic-gate for (i = p_modifiers; i < length; i++) {
1497c478bd9Sstevel@tonic-gate switch (C[i]) {
1507c478bd9Sstevel@tonic-gate case 'b':
1517c478bd9Sstevel@tonic-gate blanks_flag = 1;
1527c478bd9Sstevel@tonic-gate break;
1537c478bd9Sstevel@tonic-gate case 'd':
1547c478bd9Sstevel@tonic-gate F->f_options |= FIELD_DICTIONARY_ORDER;
1557c478bd9Sstevel@tonic-gate break;
1567c478bd9Sstevel@tonic-gate case 'f':
1577c478bd9Sstevel@tonic-gate F->f_options |= FIELD_FOLD_UPPERCASE;
1587c478bd9Sstevel@tonic-gate break;
1597c478bd9Sstevel@tonic-gate case 'i':
1607c478bd9Sstevel@tonic-gate F->f_options |=
1617c478bd9Sstevel@tonic-gate FIELD_IGNORE_NONPRINTABLES;
1627c478bd9Sstevel@tonic-gate break;
1637c478bd9Sstevel@tonic-gate case 'M':
1647c478bd9Sstevel@tonic-gate F->f_species = MONTH;
1657c478bd9Sstevel@tonic-gate break;
1667c478bd9Sstevel@tonic-gate case 'n':
1677c478bd9Sstevel@tonic-gate F->f_species = NUMERIC;
1687c478bd9Sstevel@tonic-gate break;
1697c478bd9Sstevel@tonic-gate case 'r':
1707c478bd9Sstevel@tonic-gate F->f_options |=
1717c478bd9Sstevel@tonic-gate FIELD_REVERSE_COMPARISONS;
1727c478bd9Sstevel@tonic-gate break;
1737c478bd9Sstevel@tonic-gate default:
1747c478bd9Sstevel@tonic-gate usage();
1757c478bd9Sstevel@tonic-gate break;
1767c478bd9Sstevel@tonic-gate }
1777c478bd9Sstevel@tonic-gate }
1787c478bd9Sstevel@tonic-gate }
1797c478bd9Sstevel@tonic-gate
1807c478bd9Sstevel@tonic-gate if (flags & OPTIONS_STARTSPEC) {
1817c478bd9Sstevel@tonic-gate F->f_start_field = field;
1827c478bd9Sstevel@tonic-gate F->f_start_offset = offset;
1837c478bd9Sstevel@tonic-gate if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC) {
1847c478bd9Sstevel@tonic-gate F->f_start_field--;
1857c478bd9Sstevel@tonic-gate if (offset_seen)
1867c478bd9Sstevel@tonic-gate F->f_start_offset--;
1877c478bd9Sstevel@tonic-gate }
1887c478bd9Sstevel@tonic-gate F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_START : 0;
1897c478bd9Sstevel@tonic-gate } else {
1907c478bd9Sstevel@tonic-gate F->f_end_field = field;
1917c478bd9Sstevel@tonic-gate F->f_end_offset = offset;
1927c478bd9Sstevel@tonic-gate if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC &&
1937c478bd9Sstevel@tonic-gate offset_seen && offset != 0)
1947c478bd9Sstevel@tonic-gate F->f_end_field--;
1957c478bd9Sstevel@tonic-gate F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_END : 0;
1967c478bd9Sstevel@tonic-gate }
1977c478bd9Sstevel@tonic-gate
1987c478bd9Sstevel@tonic-gate return (0);
1997c478bd9Sstevel@tonic-gate }
2007c478bd9Sstevel@tonic-gate
2017c478bd9Sstevel@tonic-gate static void
parse_new_field_spec(sort_t * S,char * arg)2027c478bd9Sstevel@tonic-gate parse_new_field_spec(sort_t *S, char *arg)
2037c478bd9Sstevel@tonic-gate {
2047c478bd9Sstevel@tonic-gate int length = strlen(arg);
2057c478bd9Sstevel@tonic-gate int p_comma = MIN(length, strcspn(arg, ","));
2067c478bd9Sstevel@tonic-gate field_t *nF;
2077c478bd9Sstevel@tonic-gate int p;
2087c478bd9Sstevel@tonic-gate
2097c478bd9Sstevel@tonic-gate /*
2107c478bd9Sstevel@tonic-gate * New field specifiers do not inherit from the general specifier if
2117c478bd9Sstevel@tonic-gate * they have any modifiers set. (This is specifically tested in the VSC
2127c478bd9Sstevel@tonic-gate * test suite, assertion 32 for POSIX.cmd/sort.)
2137c478bd9Sstevel@tonic-gate */
2147c478bd9Sstevel@tonic-gate if (field_spec_has_modifiers(arg, length)) {
2157c478bd9Sstevel@tonic-gate nF = field_new(NULL);
2167c478bd9Sstevel@tonic-gate nF->f_options = FIELD_MODIFIERS_DEFINED;
2177c478bd9Sstevel@tonic-gate } else {
2187c478bd9Sstevel@tonic-gate nF = field_new(S);
2197c478bd9Sstevel@tonic-gate }
2207c478bd9Sstevel@tonic-gate p = parse_field_spec(nF, arg, OPTIONS_STARTSPEC, p_comma);
2217c478bd9Sstevel@tonic-gate
2227c478bd9Sstevel@tonic-gate if (p != 0)
2237c478bd9Sstevel@tonic-gate usage();
2247c478bd9Sstevel@tonic-gate
2257c478bd9Sstevel@tonic-gate if (p_comma < length) {
2267c478bd9Sstevel@tonic-gate p = parse_field_spec(nF, &(arg[p_comma + 1]), 0,
2277c478bd9Sstevel@tonic-gate strlen(&(arg[p_comma + 1])));
2287c478bd9Sstevel@tonic-gate if (p != 0)
2297c478bd9Sstevel@tonic-gate usage();
2307c478bd9Sstevel@tonic-gate }
2317c478bd9Sstevel@tonic-gate
2327c478bd9Sstevel@tonic-gate if (nF->f_start_field < 0 || nF->f_start_offset < 0) {
2337c478bd9Sstevel@tonic-gate if (S->m_verbose)
2347c478bd9Sstevel@tonic-gate warn("-k %s is not a supported field specifier\n", arg);
2357c478bd9Sstevel@tonic-gate }
2367c478bd9Sstevel@tonic-gate nF->f_start_field = MAX(nF->f_start_field, 0);
2377c478bd9Sstevel@tonic-gate nF->f_start_offset = MAX(nF->f_start_offset, 0);
2387c478bd9Sstevel@tonic-gate
2397c478bd9Sstevel@tonic-gate /*
2407c478bd9Sstevel@tonic-gate * If the starting field exceeds a defined ending field, convention
2417c478bd9Sstevel@tonic-gate * dictates that the field is ignored.
2427c478bd9Sstevel@tonic-gate */
2437c478bd9Sstevel@tonic-gate if (nF->f_end_field == -1 || nF->f_start_field < nF->f_end_field ||
2447c478bd9Sstevel@tonic-gate (nF->f_start_field == nF->f_end_field &&
2457c478bd9Sstevel@tonic-gate nF->f_start_offset < nF->f_end_offset)) {
2467c478bd9Sstevel@tonic-gate field_add_to_chain(&(S->m_fields_head), nF);
2477c478bd9Sstevel@tonic-gate } else if (S->m_verbose) {
2487c478bd9Sstevel@tonic-gate warn("illegal field -k %s omitted", arg);
2497c478bd9Sstevel@tonic-gate }
2507c478bd9Sstevel@tonic-gate }
2517c478bd9Sstevel@tonic-gate
2527c478bd9Sstevel@tonic-gate /*
2537c478bd9Sstevel@tonic-gate * parse_old_field_spec() is getopt()-aware; it may modify the values of optind,
2547c478bd9Sstevel@tonic-gate * optarg, and so forth, to correctly determine the characteristics being
2557c478bd9Sstevel@tonic-gate * assigned to the current field.
2567c478bd9Sstevel@tonic-gate */
2577c478bd9Sstevel@tonic-gate static int
parse_old_field_spec(sort_t * S,int argc,char * argv[])2587c478bd9Sstevel@tonic-gate parse_old_field_spec(sort_t *S, int argc, char *argv[])
2597c478bd9Sstevel@tonic-gate {
2607c478bd9Sstevel@tonic-gate field_t *nF;
2617c478bd9Sstevel@tonic-gate int c, p;
2627c478bd9Sstevel@tonic-gate char *arg = argv[optind];
2637c478bd9Sstevel@tonic-gate
2647c478bd9Sstevel@tonic-gate if (field_spec_has_modifiers(arg + 1, strlen(arg + 1))) {
2657c478bd9Sstevel@tonic-gate nF = field_new(NULL);
2667c478bd9Sstevel@tonic-gate nF->f_options = FIELD_MODIFIERS_DEFINED;
2677c478bd9Sstevel@tonic-gate } else {
2687c478bd9Sstevel@tonic-gate nF = field_new(S);
2697c478bd9Sstevel@tonic-gate }
2707c478bd9Sstevel@tonic-gate
2717c478bd9Sstevel@tonic-gate p = parse_field_spec(nF, arg + 1, OPTIONS_OLDSPEC | OPTIONS_STARTSPEC,
2727c478bd9Sstevel@tonic-gate strlen(arg + 1));
2737c478bd9Sstevel@tonic-gate
2747c478bd9Sstevel@tonic-gate if (p != 0) {
2757c478bd9Sstevel@tonic-gate field_delete(nF);
2767c478bd9Sstevel@tonic-gate return (0);
2777c478bd9Sstevel@tonic-gate }
2787c478bd9Sstevel@tonic-gate
2797c478bd9Sstevel@tonic-gate /*
2807c478bd9Sstevel@tonic-gate * In the case that getopt() returns '?' (unrecognized option) or EOF
2817c478bd9Sstevel@tonic-gate * (non-option argument), the field is considered closed.
2827c478bd9Sstevel@tonic-gate */
2837c478bd9Sstevel@tonic-gate for (arg = argv[++optind]; optind < argc; arg = argv[optind]) {
2847c478bd9Sstevel@tonic-gate if (strlen(arg) >= 2 && *arg == '-' &&
2857c478bd9Sstevel@tonic-gate isdigit(*(uchar_t *)(arg + 1))) {
2867c478bd9Sstevel@tonic-gate (void) parse_field_spec(nF, arg + 1,
2877c478bd9Sstevel@tonic-gate OPTIONS_OLDSPEC, strlen(arg) - 1);
2887c478bd9Sstevel@tonic-gate field_add_to_chain(&(S->m_fields_head), nF);
2897c478bd9Sstevel@tonic-gate optind++;
2907c478bd9Sstevel@tonic-gate return (1);
2917c478bd9Sstevel@tonic-gate }
2927c478bd9Sstevel@tonic-gate
2937c478bd9Sstevel@tonic-gate if ((c = getopt(argc, argv, OLD_SPEC_OPTIONS_STRING)) != EOF) {
2947c478bd9Sstevel@tonic-gate switch (c) {
2957c478bd9Sstevel@tonic-gate case 'b':
2967c478bd9Sstevel@tonic-gate nF->f_options |= FIELD_IGNORE_BLANKS_START;
2977c478bd9Sstevel@tonic-gate break;
2987c478bd9Sstevel@tonic-gate case 'd':
2997c478bd9Sstevel@tonic-gate nF->f_options |= FIELD_DICTIONARY_ORDER;
3007c478bd9Sstevel@tonic-gate break;
3017c478bd9Sstevel@tonic-gate case 'f':
3027c478bd9Sstevel@tonic-gate nF->f_options |= FIELD_FOLD_UPPERCASE;
3037c478bd9Sstevel@tonic-gate break;
3047c478bd9Sstevel@tonic-gate case 'i':
3057c478bd9Sstevel@tonic-gate nF->f_options |= FIELD_IGNORE_NONPRINTABLES;
3067c478bd9Sstevel@tonic-gate break;
3077c478bd9Sstevel@tonic-gate case 'M':
3087c478bd9Sstevel@tonic-gate nF->f_species = MONTH;
3097c478bd9Sstevel@tonic-gate break;
3107c478bd9Sstevel@tonic-gate case 'n':
3117c478bd9Sstevel@tonic-gate nF->f_species = NUMERIC;
3127c478bd9Sstevel@tonic-gate break;
3137c478bd9Sstevel@tonic-gate case 'r':
3147c478bd9Sstevel@tonic-gate nF->f_options |= FIELD_REVERSE_COMPARISONS;
3157c478bd9Sstevel@tonic-gate break;
3167c478bd9Sstevel@tonic-gate case '?':
3177c478bd9Sstevel@tonic-gate case 'c':
3187c478bd9Sstevel@tonic-gate case 'm':
3197c478bd9Sstevel@tonic-gate case 'u':
3207c478bd9Sstevel@tonic-gate /*
3217c478bd9Sstevel@tonic-gate * Options without arguments.
3227c478bd9Sstevel@tonic-gate */
3237c478bd9Sstevel@tonic-gate optind -= 1;
3247c478bd9Sstevel@tonic-gate field_add_to_chain(&(S->m_fields_head), nF);
3257c478bd9Sstevel@tonic-gate return (1);
3267c478bd9Sstevel@tonic-gate /*NOTREACHED*/
3277c478bd9Sstevel@tonic-gate case 'o':
3287c478bd9Sstevel@tonic-gate case 'T':
3297c478bd9Sstevel@tonic-gate case 'z':
3307c478bd9Sstevel@tonic-gate case 't':
3317c478bd9Sstevel@tonic-gate case 'k':
3327c478bd9Sstevel@tonic-gate case 'S':
3337c478bd9Sstevel@tonic-gate /*
3347c478bd9Sstevel@tonic-gate * Options with arguments.
3357c478bd9Sstevel@tonic-gate */
3367c478bd9Sstevel@tonic-gate if (optarg == argv[optind - 1] + 2) {
3377c478bd9Sstevel@tonic-gate optind -= 1;
3387c478bd9Sstevel@tonic-gate } else {
3397c478bd9Sstevel@tonic-gate optind -= 2;
3407c478bd9Sstevel@tonic-gate }
3417c478bd9Sstevel@tonic-gate field_add_to_chain(&(S->m_fields_head), nF);
3427c478bd9Sstevel@tonic-gate return (1);
3437c478bd9Sstevel@tonic-gate /*NOTREACHED*/
3447c478bd9Sstevel@tonic-gate default:
3457c478bd9Sstevel@tonic-gate die(EMSG_UNKN_OPTION);
3467c478bd9Sstevel@tonic-gate /*NOTREACHED*/
3477c478bd9Sstevel@tonic-gate }
3487c478bd9Sstevel@tonic-gate } else {
3497c478bd9Sstevel@tonic-gate break;
3507c478bd9Sstevel@tonic-gate }
3517c478bd9Sstevel@tonic-gate }
3527c478bd9Sstevel@tonic-gate
3537c478bd9Sstevel@tonic-gate field_add_to_chain(&(S->m_fields_head), nF);
3547c478bd9Sstevel@tonic-gate return (1);
3557c478bd9Sstevel@tonic-gate }
3567c478bd9Sstevel@tonic-gate
3577c478bd9Sstevel@tonic-gate int
options(sort_t * S,int argc,char * argv[])3587c478bd9Sstevel@tonic-gate options(sort_t *S, int argc, char *argv[])
3597c478bd9Sstevel@tonic-gate {
3607c478bd9Sstevel@tonic-gate int c;
3617c478bd9Sstevel@tonic-gate
3627c478bd9Sstevel@tonic-gate optind = 1;
3637c478bd9Sstevel@tonic-gate while (optind < argc) {
3647c478bd9Sstevel@tonic-gate if (strncmp("-y", argv[optind], strlen("-y")) == 0) {
3657c478bd9Sstevel@tonic-gate /*
3667c478bd9Sstevel@tonic-gate * The -y [kmem] option violates the standard syntax
3677c478bd9Sstevel@tonic-gate * outlined in intro(1). we have to be a little fancy
3687c478bd9Sstevel@tonic-gate * to determine if the next argument is a valid integer.
3697c478bd9Sstevel@tonic-gate * (note, of course, that the previous sort(1) had no
3707c478bd9Sstevel@tonic-gate * mechanism to resolve a final
3717c478bd9Sstevel@tonic-gate * -y 99999
3727c478bd9Sstevel@tonic-gate * into
3737c478bd9Sstevel@tonic-gate * -y, file 99999
3747c478bd9Sstevel@tonic-gate * or
3757c478bd9Sstevel@tonic-gate * -y 99999, file stdin
3767c478bd9Sstevel@tonic-gate *
3777c478bd9Sstevel@tonic-gate * Now one can unambiguously use
3787c478bd9Sstevel@tonic-gate * -y -- 99999
3797c478bd9Sstevel@tonic-gate * and
3807c478bd9Sstevel@tonic-gate * -y 99999 -
3817c478bd9Sstevel@tonic-gate * to distinguish these cases.
3827c478bd9Sstevel@tonic-gate *
3837c478bd9Sstevel@tonic-gate * That said, we do not use the information passed using
3847c478bd9Sstevel@tonic-gate * -y option in sort(1); we provide the argument to
3857c478bd9Sstevel@tonic-gate * preserve compatibility for existing scripts.
3867c478bd9Sstevel@tonic-gate */
3877c478bd9Sstevel@tonic-gate if (strlen(argv[optind]) == strlen("-y") &&
3887c478bd9Sstevel@tonic-gate optind + 1 < argc &&
3897c478bd9Sstevel@tonic-gate is_number(argv[optind + 1]))
3907c478bd9Sstevel@tonic-gate optind += 2;
3917c478bd9Sstevel@tonic-gate else
3927c478bd9Sstevel@tonic-gate optind += 1;
3937c478bd9Sstevel@tonic-gate }
3947c478bd9Sstevel@tonic-gate
3957c478bd9Sstevel@tonic-gate if ((c = getopt(argc, argv, OPTIONS_STRING)) != EOF) {
3967c478bd9Sstevel@tonic-gate switch (c) {
3977c478bd9Sstevel@tonic-gate case 'c':
3987c478bd9Sstevel@tonic-gate S->m_check_if_sorted_only = 1;
3997c478bd9Sstevel@tonic-gate break;
4007c478bd9Sstevel@tonic-gate
4017c478bd9Sstevel@tonic-gate case 'm':
4027c478bd9Sstevel@tonic-gate S->m_merge_only = 1;
4037c478bd9Sstevel@tonic-gate break;
4047c478bd9Sstevel@tonic-gate
4057c478bd9Sstevel@tonic-gate case 'u':
4067c478bd9Sstevel@tonic-gate S->m_unique_lines = 1;
4077c478bd9Sstevel@tonic-gate break;
4087c478bd9Sstevel@tonic-gate
4097c478bd9Sstevel@tonic-gate case 'o':
4107c478bd9Sstevel@tonic-gate S->m_output_filename = optarg;
4117c478bd9Sstevel@tonic-gate break;
4127c478bd9Sstevel@tonic-gate
4137c478bd9Sstevel@tonic-gate case 'T':
4147c478bd9Sstevel@tonic-gate S->m_tmpdir_template = optarg;
4157c478bd9Sstevel@tonic-gate break;
4167c478bd9Sstevel@tonic-gate
4177c478bd9Sstevel@tonic-gate case 'z':
4187c478bd9Sstevel@tonic-gate /*
4197c478bd9Sstevel@tonic-gate * ignore optarg -- obsolete
4207c478bd9Sstevel@tonic-gate */
4217c478bd9Sstevel@tonic-gate break;
4227c478bd9Sstevel@tonic-gate
4237c478bd9Sstevel@tonic-gate case 'd':
4247c478bd9Sstevel@tonic-gate S->m_field_options |= FIELD_DICTIONARY_ORDER;
4257c478bd9Sstevel@tonic-gate field_apply_all(S->m_fields_head,
4267c478bd9Sstevel@tonic-gate FIELD_DICTIONARY_ORDER);
4277c478bd9Sstevel@tonic-gate break;
4287c478bd9Sstevel@tonic-gate
4297c478bd9Sstevel@tonic-gate case 'f':
4307c478bd9Sstevel@tonic-gate S->m_field_options |= FIELD_FOLD_UPPERCASE;
4317c478bd9Sstevel@tonic-gate field_apply_all(S->m_fields_head,
4327c478bd9Sstevel@tonic-gate FIELD_FOLD_UPPERCASE);
4337c478bd9Sstevel@tonic-gate break;
4347c478bd9Sstevel@tonic-gate
4357c478bd9Sstevel@tonic-gate case 'i':
4367c478bd9Sstevel@tonic-gate S->m_field_options |=
4377c478bd9Sstevel@tonic-gate FIELD_IGNORE_NONPRINTABLES;
4387c478bd9Sstevel@tonic-gate field_apply_all(S->m_fields_head,
4397c478bd9Sstevel@tonic-gate FIELD_IGNORE_NONPRINTABLES);
4407c478bd9Sstevel@tonic-gate break;
4417c478bd9Sstevel@tonic-gate
4427c478bd9Sstevel@tonic-gate case 'M':
4437c478bd9Sstevel@tonic-gate S->m_default_species = MONTH;
4447c478bd9Sstevel@tonic-gate S->m_field_options &=
4457c478bd9Sstevel@tonic-gate ~FIELD_IGNORE_BLANKS_START;
4467c478bd9Sstevel@tonic-gate break;
4477c478bd9Sstevel@tonic-gate
4487c478bd9Sstevel@tonic-gate case 'n':
4497c478bd9Sstevel@tonic-gate S->m_default_species = NUMERIC;
4507c478bd9Sstevel@tonic-gate {
4517c478bd9Sstevel@tonic-gate field_t *f;
4527c478bd9Sstevel@tonic-gate
4537c478bd9Sstevel@tonic-gate for (f = S->m_fields_head; f;
4547c478bd9Sstevel@tonic-gate f = f->f_next)
4557c478bd9Sstevel@tonic-gate if ((f->f_options &
4567c478bd9Sstevel@tonic-gate FIELD_MODIFIERS_DEFINED) ==
4577c478bd9Sstevel@tonic-gate 0)
4587c478bd9Sstevel@tonic-gate f->f_species = NUMERIC;
4597c478bd9Sstevel@tonic-gate }
4607c478bd9Sstevel@tonic-gate break;
4617c478bd9Sstevel@tonic-gate
4627c478bd9Sstevel@tonic-gate case 'b':
4637c478bd9Sstevel@tonic-gate S->m_field_options |=
4647c478bd9Sstevel@tonic-gate FIELD_IGNORE_BLANKS_START |
4657c478bd9Sstevel@tonic-gate FIELD_IGNORE_BLANKS_END;
4667c478bd9Sstevel@tonic-gate break;
4677c478bd9Sstevel@tonic-gate
4687c478bd9Sstevel@tonic-gate case 'r':
4697c478bd9Sstevel@tonic-gate S->m_field_options |=
4707c478bd9Sstevel@tonic-gate FIELD_REVERSE_COMPARISONS;
4717c478bd9Sstevel@tonic-gate field_apply_all(S->m_fields_head,
4727c478bd9Sstevel@tonic-gate FIELD_REVERSE_COMPARISONS);
4737c478bd9Sstevel@tonic-gate break;
4747c478bd9Sstevel@tonic-gate
4757c478bd9Sstevel@tonic-gate case 't':
4767c478bd9Sstevel@tonic-gate /*
4777c478bd9Sstevel@tonic-gate * delimiter
4787c478bd9Sstevel@tonic-gate */
4797c478bd9Sstevel@tonic-gate if (S->m_single_byte_locale) {
4807c478bd9Sstevel@tonic-gate /*
4817c478bd9Sstevel@tonic-gate * Most debuggers can't take tabs as
4827c478bd9Sstevel@tonic-gate * input arguments, so we provide an
4837c478bd9Sstevel@tonic-gate * escape sequence to allow testing of
4847c478bd9Sstevel@tonic-gate * this special case for the DEBUG
4857c478bd9Sstevel@tonic-gate * version.
4867c478bd9Sstevel@tonic-gate */
4877c478bd9Sstevel@tonic-gate S->m_field_separator.sc =
4887c478bd9Sstevel@tonic-gate #ifdef DEBUG
4897c478bd9Sstevel@tonic-gate xstreql(optarg, "\\t") ? '\t' :
4907c478bd9Sstevel@tonic-gate #endif
4917c478bd9Sstevel@tonic-gate optarg[0];
4927c478bd9Sstevel@tonic-gate } else
4937c478bd9Sstevel@tonic-gate (void) mbtowc(&S->m_field_separator.wc,
4947c478bd9Sstevel@tonic-gate optarg, MB_CUR_MAX);
4957c478bd9Sstevel@tonic-gate break;
4967c478bd9Sstevel@tonic-gate
4977c478bd9Sstevel@tonic-gate case 'k':
4987c478bd9Sstevel@tonic-gate /*
4997c478bd9Sstevel@tonic-gate * key
5007c478bd9Sstevel@tonic-gate */
5017c478bd9Sstevel@tonic-gate (void) parse_new_field_spec(S, optarg);
5027c478bd9Sstevel@tonic-gate break;
5037c478bd9Sstevel@tonic-gate
5047c478bd9Sstevel@tonic-gate case 'S':
5057c478bd9Sstevel@tonic-gate S->m_memory_limit = strtomem(optarg);
5067c478bd9Sstevel@tonic-gate #ifdef DEBUG
5077c478bd9Sstevel@tonic-gate (void) fprintf(stderr, CMDNAME
5087c478bd9Sstevel@tonic-gate ": limiting size to %d bytes\n",
5097c478bd9Sstevel@tonic-gate S->m_memory_limit);
5107c478bd9Sstevel@tonic-gate #endif /* DEBUG */
5117c478bd9Sstevel@tonic-gate break;
5127c478bd9Sstevel@tonic-gate
5137c478bd9Sstevel@tonic-gate /*
5147c478bd9Sstevel@tonic-gate * We never take a naked -999; these should always be
5157c478bd9Sstevel@tonic-gate * associated with a preceding +000.
5167c478bd9Sstevel@tonic-gate */
5177c478bd9Sstevel@tonic-gate case '0':
5187c478bd9Sstevel@tonic-gate case '1':
5197c478bd9Sstevel@tonic-gate case '2':
5207c478bd9Sstevel@tonic-gate case '3':
5217c478bd9Sstevel@tonic-gate case '4':
5227c478bd9Sstevel@tonic-gate case '5':
5237c478bd9Sstevel@tonic-gate case '6':
5247c478bd9Sstevel@tonic-gate case '7':
5257c478bd9Sstevel@tonic-gate case '8':
5267c478bd9Sstevel@tonic-gate case '9':
5277c478bd9Sstevel@tonic-gate usage();
5287c478bd9Sstevel@tonic-gate break;
5297c478bd9Sstevel@tonic-gate case '?':
5307c478bd9Sstevel@tonic-gate /* error case */
5317c478bd9Sstevel@tonic-gate usage();
5327c478bd9Sstevel@tonic-gate break;
5337c478bd9Sstevel@tonic-gate }
5347c478bd9Sstevel@tonic-gate
5357c478bd9Sstevel@tonic-gate /*
5367c478bd9Sstevel@tonic-gate * Go back for next argument.
5377c478bd9Sstevel@tonic-gate */
5387c478bd9Sstevel@tonic-gate continue;
5397c478bd9Sstevel@tonic-gate }
5407c478bd9Sstevel@tonic-gate
5417c478bd9Sstevel@tonic-gate /*
5427c478bd9Sstevel@tonic-gate * There are three (interpretable) possibilities for getopt() to
5437c478bd9Sstevel@tonic-gate * return EOF with arguments on the command line: we have seen
5447c478bd9Sstevel@tonic-gate * the "end-of-options" token, --, we have encountered the
5457c478bd9Sstevel@tonic-gate * old-style field definition, +NNN, or we have found a
5467c478bd9Sstevel@tonic-gate * filename.
5477c478bd9Sstevel@tonic-gate *
5487c478bd9Sstevel@tonic-gate * In the second case, we must also search for the optional -NNN
5497c478bd9Sstevel@tonic-gate * field terminal definition. (since "+joe", for instance, is
5507c478bd9Sstevel@tonic-gate * a valid filename, we must handle this pattern as well.) This
5517c478bd9Sstevel@tonic-gate * is performed by parse_old_field_spec().
5527c478bd9Sstevel@tonic-gate */
5537c478bd9Sstevel@tonic-gate if (xstreql(argv[optind - 1], "--")) {
5547c478bd9Sstevel@tonic-gate /*
5557c478bd9Sstevel@tonic-gate * Process all arguments following end-of-options token
5567c478bd9Sstevel@tonic-gate * as filenames.
5577c478bd9Sstevel@tonic-gate */
5587c478bd9Sstevel@tonic-gate while (optind < argc) {
5597c478bd9Sstevel@tonic-gate if (xstreql(argv[optind], "-"))
5607c478bd9Sstevel@tonic-gate S->m_input_from_stdin = 1;
5617c478bd9Sstevel@tonic-gate else
5627c478bd9Sstevel@tonic-gate stream_add_file_to_chain(
5637c478bd9Sstevel@tonic-gate &(S->m_input_streams),
5647c478bd9Sstevel@tonic-gate argv[optind]);
5657c478bd9Sstevel@tonic-gate optind++;
5667c478bd9Sstevel@tonic-gate }
5677c478bd9Sstevel@tonic-gate
5687c478bd9Sstevel@tonic-gate break;
5697c478bd9Sstevel@tonic-gate }
5707c478bd9Sstevel@tonic-gate
5717c478bd9Sstevel@tonic-gate if (optind < argc) {
5727c478bd9Sstevel@tonic-gate if (xstreql(argv[optind], "-")) {
5737c478bd9Sstevel@tonic-gate S->m_input_from_stdin = 1;
5747c478bd9Sstevel@tonic-gate optind++;
5757c478bd9Sstevel@tonic-gate } else if (*(argv[optind]) != '+' ||
5767c478bd9Sstevel@tonic-gate !parse_old_field_spec(S, argc, argv)) {
5777c478bd9Sstevel@tonic-gate /*
5787c478bd9Sstevel@tonic-gate * It's a filename, because it either doesn't
5797c478bd9Sstevel@tonic-gate * start with '+', or if it did, it wasn't an
5807c478bd9Sstevel@tonic-gate * actual field specifier.
5817c478bd9Sstevel@tonic-gate */
5827c478bd9Sstevel@tonic-gate stream_add_file_to_chain(&(S->m_input_streams),
5837c478bd9Sstevel@tonic-gate argv[optind]);
5847c478bd9Sstevel@tonic-gate optind++;
5857c478bd9Sstevel@tonic-gate }
5867c478bd9Sstevel@tonic-gate }
5877c478bd9Sstevel@tonic-gate }
5887c478bd9Sstevel@tonic-gate
5897c478bd9Sstevel@tonic-gate if (S->m_input_streams == NULL)
5907c478bd9Sstevel@tonic-gate S->m_input_from_stdin = 1;
5917c478bd9Sstevel@tonic-gate
5927c478bd9Sstevel@tonic-gate if (S->m_output_filename == NULL)
5937c478bd9Sstevel@tonic-gate S->m_output_to_stdout = 1;
5947c478bd9Sstevel@tonic-gate
5957c478bd9Sstevel@tonic-gate /*
5967c478bd9Sstevel@tonic-gate * If no fields, then one great field. However, if the -b option was
5977c478bd9Sstevel@tonic-gate * set globally, be sure to ignore it, as per UNIX98.
5987c478bd9Sstevel@tonic-gate */
5997c478bd9Sstevel@tonic-gate if (S->m_fields_head == NULL) {
6007c478bd9Sstevel@tonic-gate S->m_field_options &= ~FIELD_IGNORE_BLANKS_START;
6017c478bd9Sstevel@tonic-gate
6027c478bd9Sstevel@tonic-gate (void) parse_new_field_spec(S, "1");
6037c478bd9Sstevel@tonic-gate /*
6047c478bd9Sstevel@tonic-gate * "Entire line" fast path is only valid if no delimiter has
6057c478bd9Sstevel@tonic-gate * been set and no modifiers have been applied.
6067c478bd9Sstevel@tonic-gate */
6077c478bd9Sstevel@tonic-gate if (S->m_field_separator.wc == 0 &&
6087c478bd9Sstevel@tonic-gate S->m_default_species == ALPHA &&
6097c478bd9Sstevel@tonic-gate S->m_field_options == 0)
6107c478bd9Sstevel@tonic-gate S->m_entire_line = 1;
6117c478bd9Sstevel@tonic-gate }
6127c478bd9Sstevel@tonic-gate
6137c478bd9Sstevel@tonic-gate return (0);
6147c478bd9Sstevel@tonic-gate }
615