xref: /illumos-gate/usr/src/cmd/xargs/xargs.c (revision ef69670d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <unistd.h>
36 #include <fcntl.h>
37 #include <string.h>
38 #include <stdarg.h>
39 #include <libgen.h>
40 #include <stdlib.h>
41 #include <limits.h>
42 #include <wchar.h>
43 #include <locale.h>
44 #include <langinfo.h>
45 #include <stropts.h>
46 #include <poll.h>
47 #include <errno.h>
48 #include <stdarg.h>
49 
50 #define	HEAD	0
51 #define	TAIL	1
52 #define	FALSE 0
53 #define	TRUE 1
54 #define	MAXSBUF 255
55 #define	MAXIBUF 512
56 #define	MAXINSERTS 5
57 #define	BUFSIZE LINE_MAX
58 #define	MAXARGS 255
59 #define	INSPAT_STR	"{}"	/* default replstr string for -[Ii]	*/
60 #define	FORK_RETRY	5
61 
62 #define	QBUF_STARTLEN 255  /* start size of growable string buffer */
63 #define	QBUF_INC 100	   /* how much to grow a growable string by */
64 
65 static wctype_t	blank;
66 static char	*arglist[MAXARGS+1];
67 static char	argbuf[BUFSIZE+1];
68 static char	*next = argbuf;
69 static char	*lastarg = "";
70 static char	**ARGV = arglist;
71 static char	*LEOF = "_";
72 static char	*INSPAT = INSPAT_STR;
73 static char	ins_buf[MAXIBUF];
74 static char	*p_ibuf;
75 
76 static struct inserts {
77 	char	**p_ARGV;	/* where to put newarg ptr in arg list */
78 	char	*p_skel;	/* ptr to arg template */
79 } saveargv[MAXINSERTS];
80 
81 static off_t	file_offset = 0;
82 static int	PROMPT = -1;
83 static int	BUFLIM = BUFSIZE;
84 static int	N_ARGS = 0;
85 static int	N_args = 0;
86 static int	N_lines = 0;
87 static int	DASHX = FALSE;
88 static int	MORE = TRUE;
89 static int	PER_LINE = FALSE;
90 static int	ERR = FALSE;
91 static int	OK = TRUE;
92 static int	LEGAL = FALSE;
93 static int	TRACE = FALSE;
94 static int	INSERT = FALSE;
95 static int	linesize = 0;
96 static int	ibufsize = 0;
97 static char	*yesstr;	/* the string contains int'l for "yes"	*/
98 static int	exitstat = 0;	/* our exit status			*/
99 static int	mac;		/* modified argc, after parsing		*/
100 static char	**mav;		/* modified argv, after parsing		*/
101 static int	n_inserts;	/* # of insertions.			*/
102 static int	inquote = 0;	/* processing a quoted string		*/
103 
104 /*
105  * the pio structure is used to save any pending input before the
106  * user replies to a prompt. the pending input is saved here,
107  * for the appropriate processing later.
108  */
109 typedef struct pio {
110 	struct pio *next;	/* next in stack			*/
111 	char *start;		/* starting addr of the buffer		*/
112 	char *cur;		/* ptr to current char in buf		*/
113 	size_t length;		/* number of bytes remaining		*/
114 } pio;
115 
116 static pio *queued_data = NULL;
117 
118 /* our usage message:							*/
119 #define	USAGEMSG "Usage: xargs: [-t] [-p] [-e[eofstr]] [-E eofstr] "\
120 	"[-I replstr] [-i[replstr]] [-L #] [-l[#]] [-n # [-x]] [-s size] "\
121 	"[cmd [args ...]]\n"
122 
123 static int	echoargs();
124 static int	getchr(void);
125 static wchar_t	getwchr(void);
126 static void	ungetwchr(wchar_t);
127 static int	lcall(char *sub, char **subargs);
128 static int	xindex(char *as1, char *as2);
129 static void	addibuf(struct inserts *p);
130 static void	ermsg(char *messages, ...);
131 static char	*addarg(char *arg);
132 static char	*checklen(char *arg);
133 static size_t   store_wchr(char **, size_t *, size_t, wchar_t);
134 static char	*getarg();
135 static char	*insert(char *pattern, char *subst);
136 static void	usage();
137 static void	parseargs();
138 static void	saveinput();
139 
140 
141 int
142 main(int argc, char **argv)
143 {
144 	int	j;
145 	struct inserts *psave;
146 	int c;
147 	int	initsize;
148 	char	*cmdname, *initbuf, **initlist;
149 
150 
151 	/* initialization */
152 
153 	blank = wctype("blank");
154 	n_inserts = 0;
155 	psave = saveargv;
156 	(void) setlocale(LC_ALL, "");
157 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D 		*/
158 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't 		*/
159 #endif
160 	(void) textdomain(TEXT_DOMAIN);
161 
162 	/*
163 	 * now we get the appropriate "yes" string for our locale.
164 	 * since this may be a multibyte character, we store the
165 	 * string which is returned. later on, when we're looking for
166 	 * a "y" in response to our prompt, we'll use the first
167 	 * multibyte character of yesstr as a comparision.
168 	 */
169 	initbuf = nl_langinfo(YESSTR);	/* initbuf is a tmp placeholder here */
170 	if ((yesstr = malloc(strlen(initbuf) + 1)) == NULL) {
171 		perror(gettext("xargs: Memory allocation failure"));
172 		exit(1);
173 	}
174 	(void) strcpy(yesstr, initbuf);
175 
176 	parseargs(argc, argv);
177 
178 	/* handling all of xargs arguments:				*/
179 	while ((c = getopt(mac, mav, "tpe:E:I:i:L:l:n:s:x")) != EOF) {
180 		switch (c) {
181 		case 't':	/* -t: turn trace mode on		*/
182 			TRACE = TRUE;
183 			break;
184 
185 		case 'p':	/* -p: turn on prompt mode.		*/
186 			if ((PROMPT = open("/dev/tty", O_RDONLY)) == -1) {
187 				perror(gettext("can't read from tty for -p"));
188 			} else {
189 				TRACE = TRUE;
190 			}
191 			break;
192 
193 		case 'e':
194 			/*
195 			 * -e[eofstr]: set/disable end-of-file.
196 			 * N.B. that an argument *isn't* required here; but
197 			 * parseargs forced an argument if not was given.  The
198 			 * forced argument is the default...
199 			 */
200 			LEOF = optarg; /* can be empty */
201 			break;
202 
203 		case 'E':
204 			/*
205 			 * -E eofstr: change end-of-file string.
206 			 * eofstr *is* required here:
207 			 */
208 			LEOF = optarg;
209 #ifdef XPG6
210 			if (LEOF == NULL) {
211 #else
212 			if ((LEOF == NULL) || (*LEOF == NULL)) {
213 #endif
214 				ermsg(gettext(
215 				    "Option requires an argument: -%c\n"), c);
216 			}
217 			break;
218 
219 		case 'I':
220 			/* -I replstr: Insert mode. replstr *is* required. */
221 			INSERT = PER_LINE = LEGAL = TRUE;
222 			N_ARGS = 0;
223 			if ((optarg != NULL) && (*optarg != '\0')) {
224 				INSPAT = optarg;
225 			} else {
226 				ermsg(gettext(
227 				    "Option requires an argument: -%c\n"), c);
228 			}
229 			break;
230 
231 		case 'i':
232 			/*
233 			 * -i [replstr]: insert mode, with *optional* replstr.
234 			 * N.B. that an argument *isn't* required here; if
235 			 * it's not given, then the string INSPAT_STR will
236 			 * be assumed.
237 			 *
238 			 * Since getopts(3C) doesn't handle the case of an
239 			 * optional variable argument at all, we have to
240 			 * parse this by hand:
241 			 */
242 
243 			INSERT = PER_LINE = LEGAL = TRUE;
244 			N_ARGS = 0;
245 			if ((optarg != NULL) && (*optarg != '\0')) {
246 				INSPAT = optarg;
247 			} else {
248 				/*
249 				 * here, there is no next argument. so
250 				 * we reset INSPAT to the INSPAT_STR.
251 				 * we *have* to do this, as -i/I may have
252 				 * been given previously, and XCU4 requires
253 				 * that only "the last one specified takes
254 				 * effect".
255 				 */
256 				INSPAT = INSPAT_STR;
257 			}
258 			break;
259 
260 		case 'L':
261 			/*
262 			 * -L number: # of times cmd is executed
263 			 * number *is* required here:
264 			 */
265 			PER_LINE = TRUE;
266 			N_ARGS = 0;
267 			INSERT = FALSE;
268 			if ((optarg == NULL) || (*optarg == '\0')) {
269 				ermsg(gettext(
270 				    "Option requires an argument: -%c\n"), c);
271 			} else if ((PER_LINE = atoi(optarg)) <= 0) {
272 				ermsg(gettext("#lines must be positive "
273 				    "int: %s\n"), optarg);
274 			}
275 			break;
276 
277 		case 'l':
278 			/*
279 			 * -l [number]: # of times cmd is executed
280 			 * N.B. that an argument *isn't* required here; if
281 			 * it's not given, then 1 is assumed.
282 			 *
283 			 * parseargs handles the optional arg processing.
284 			 */
285 
286 			PER_LINE = LEGAL = TRUE;  /* initialization	*/
287 			N_ARGS = 0;
288 			INSERT = FALSE;
289 
290 			if ((optarg != NULL) && (*optarg != '\0')) {
291 				if ((PER_LINE = atoi(optarg)) <= 0)
292 					PER_LINE = 1;
293 			}
294 			break;
295 
296 		case 'n':	/* -n number: # stdin args		*/
297 			/*
298 			 * -n number: # stdin args.
299 			 * number *is* required here:
300 			 */
301 			if ((optarg == NULL) || (*optarg == '\0')) {
302 				ermsg(gettext(
303 				    "Option requires an argument: -%c\n"), c);
304 			} else if ((N_ARGS = atoi(optarg)) <= 0) {
305 				ermsg(gettext("#args must be positive "
306 				    "int: %s\n"), optarg);
307 			} else {
308 				LEGAL = DASHX || N_ARGS == 1;
309 				INSERT = PER_LINE = FALSE;
310 			}
311 			break;
312 
313 		case 's':	/* -s size: set max size of each arg list */
314 			if ((optarg == NULL) || (*optarg == '\0')) {
315 				ermsg(gettext(
316 				    "Option requires an argument: -%c\n"), c);
317 			} else {
318 				BUFLIM = atoi(optarg);
319 				if (BUFLIM > BUFSIZE || BUFLIM <= 0) {
320 					ermsg(gettext(
321 					    "0 < max-cmd-line-size <= %d: "
322 					    "%s\n"), BUFSIZE, optarg);
323 				}
324 			}
325 			break;
326 
327 		case 'x':	/* -x: terminate if args > size limit	*/
328 			DASHX = LEGAL = TRUE;
329 			break;
330 
331 		default:
332 			/*
333 			 * bad argument. complain and get ready to die.
334 			 */
335 			ERR = TRUE;
336 			usage();
337 
338 			exit(2);
339 			break;
340 		}
341 	}
342 
343 	/*
344 	 * if anything called ermsg(), something screwed up, so
345 	 * we exit early.
346 	 */
347 	if (OK == FALSE) {
348 		ERR = TRUE;
349 		usage();
350 		exit(2);
351 	}
352 
353 	/*
354 	 * we're finished handling xargs's options, so now pick up
355 	 * the command name (if any), and it's options.
356 	 */
357 
358 
359 	mac -= optind;	/* dec arg count by what we've processed 	*/
360 	mav += optind;	/* inc to current mav				*/
361 
362 	if (mac <= 0) {	/* if there're no more args to process,	*/
363 		cmdname = "/usr/bin/echo";	/* our default command	*/
364 		*ARGV++ = addarg(cmdname);	/* use the default cmd.	*/
365 	} else {	/* otherwise keep parsing rest of the string.	*/
366 		/*
367 		 * note that we can't use getopts(3C), and *must* parse
368 		 * this by hand, as we don't know apriori what options the
369 		 * command will take.
370 		 */
371 		cmdname = *mav;	/* get the command name	*/
372 
373 
374 		/* pick up the remaining args from the command line:	*/
375 		while ((OK == TRUE) && (mac-- > 0)) {
376 			/*
377 			 * while we haven't crapped out, and there's
378 			 * work to do:
379 			 */
380 			if (INSERT && ! ERR) {
381 				if (xindex(*mav, INSPAT) != -1) {
382 					if (++n_inserts > MAXINSERTS) {
383 						ermsg(gettext("too many args "
384 						    "with %s\n"), INSPAT);
385 						ERR = TRUE;
386 					}
387 					psave->p_ARGV = ARGV;
388 					(psave++)->p_skel = *mav;
389 				}
390 			}
391 			*ARGV++ = addarg(*mav++);
392 		}
393 	}
394 
395 	/* pick up args from standard input */
396 
397 	initbuf = next;
398 	initlist = ARGV;
399 	initsize = linesize;
400 
401 	while (OK && MORE) {
402 		N_args = 0;
403 		N_lines = 0;
404 		next = initbuf;
405 		ARGV = initlist;
406 		linesize = initsize;
407 		if (*lastarg) {
408 			*ARGV++ = addarg(lastarg);
409 			lastarg = "";
410 		}
411 
412 		while (((ARGV - arglist) < MAXARGS) &&
413 		    ((*ARGV++ = getarg()) != NULL) && OK)
414 			;
415 
416 		/* insert arg if requested */
417 
418 		if (!ERR && INSERT) {
419 			if ((!MORE) && (N_lines == 0)) {
420 				exit(exitstat);
421 			}
422 					/* no more input lines */
423 			p_ibuf = ins_buf;
424 			ARGV--;
425 			j = ibufsize = 0;
426 			for (psave = saveargv; ++j <= n_inserts; ++psave) {
427 				addibuf(psave);
428 				if (ERR)
429 					break;
430 			}
431 		}
432 		*ARGV = 0;
433 
434 		if (n_inserts > 0) {
435 			int t_ninserts;
436 
437 			/*
438 			 * if we've done any insertions, re-calculate the
439 			 * linesize. bomb out if we've exceeded our length.
440 			 */
441 			t_ninserts = n_inserts;
442 			n_inserts = 0;	/* inserts have been done 	*/
443 			linesize = 0;	/* recalculate this		*/
444 
445 			/* for each current argument in the list:	*/
446 			for (ARGV = arglist; *ARGV != NULL; ARGV++) {
447 				/* recalculate everything.		*/
448 				if (checklen(*ARGV) != 0) {
449 					if (N_ARGS && (N_args >= N_ARGS)) {
450 						N_lines = N_args = 0;
451 						OK = FALSE;
452 						ERR = TRUE;
453 					}
454 				}
455 			}
456 			n_inserts = t_ninserts;
457 		}
458 
459 		/* exec command */
460 
461 		if (!ERR) {
462 			if (!MORE &&
463 			    (PER_LINE && N_lines == 0 || N_ARGS && N_args == 0))
464 				exit(exitstat);
465 			OK = TRUE;
466 			j = TRACE ? echoargs() : TRUE;
467 			if (j) {
468 				/*
469 				 * for xcu4, all invocations of cmdname must
470 				 * return 0, in order for us to return 0.
471 				 * so if we have a non-zero status here,
472 				 * quit immediately.
473 				 */
474 				if ((exitstat |= lcall(cmdname, arglist)) == 0)
475 					continue;
476 			}
477 		}
478 	}
479 
480 	(void) lseek(0, file_offset, SEEK_SET);
481 	if (OK) {
482 		return (exitstat);
483 	} else {
484 		/*
485 		 * if exitstat was set, to match XCU4 complience,
486 		 * return that value, otherwise, return 1.
487 		 */
488 		return (exitstat ? exitstat : 1);
489 	}
490 }
491 
492 static void
493 queue(char *buffer, int len, int where)
494 {
495 	pio *new, *element;
496 
497 	if ((new = malloc(sizeof (pio))) == NULL) {
498 		perror(gettext("xargs: Memory allocation failure"));
499 		exit(1);
500 	}
501 	new->cur = new->start = buffer;
502 	new->length = len;
503 
504 	if (where == TAIL) {
505 		new->next = NULL;
506 		if (queued_data == NULL) {
507 			queued_data = new;
508 		} else {
509 			element = queued_data;
510 			while (element->next != NULL) {
511 				element = element->next;
512 			}
513 			element->next = new;
514 		}
515 	} else {
516 		file_offset -= len;
517 		new->next = queued_data;
518 		queued_data = new;
519 	}
520 }
521 
522 static char *
523 checklen(char *arg)
524 {
525 	int	oklen;
526 
527 	oklen = TRUE;
528 	linesize += strlen(arg) + 1;
529 	if (linesize >= BUFLIM) {
530 		/*
531 		 * we skip this if there're inserts. we'll handle the
532 		 * argument counting after all the insertions have
533 		 * been done.
534 		 */
535 		if (n_inserts == 0) {
536 			lastarg = arg;
537 			oklen = OK = FALSE;
538 
539 			if (LEGAL) {
540 				ERR = TRUE;
541 				ermsg(gettext("arg list too long\n"));
542 			} else if (N_args > 1) {
543 				N_args = 1;
544 			} else {
545 				ermsg(gettext("a single arg was greater than "
546 				    "the max arglist size of %d characters\n"),
547 				    BUFLIM);
548 				ERR = TRUE;
549 			}
550 		}
551 	}
552 	return (oklen ? arg : 0);
553 }
554 
555 static char *
556 addarg(char *arg)
557 {
558 	if (checklen(arg) != 0) {
559 		(void) strcpy(next, arg);
560 		arg = next;
561 		next += strlen(arg) + 1;
562 		return (arg);
563 	}
564 	return ((char *)0);
565 }
566 
567 /*
568  * store_wchr() : append a wchar_t to a char buffer, resize buffer if required.
569  *
570  *     Given a pointer to the beginning of a string buffer, the length of the
571  *     buffer and an offset indicating the next place to write within that
572  *     buffer, the passed wchar_t will be appended to the buffer if there is
573  *     enough space. If there is not enough space, an attempt to reallocate the
574  *     buffer will be made and if successful the passed pointer and size will be
575  *     updated to describe the reallocated block. Returns the new value for
576  *     'offset' (it will be incremented by the number of bytes written).
577  */
578 static size_t
579 store_wchr(char **buffer, size_t *buflen, size_t offset, wchar_t c)
580 {
581 	int bytes;
582 
583 	/*
584 	 * Make sure that there is enough room in the buffer to store the
585 	 * maximum length of c.
586 	 */
587 	if ((offset + MB_CUR_MAX) > *buflen) {
588 		/*
589 		 * Not enough room so attempt to reallocate. Add 'MB_CUR_MAX' to
590 		 * buffer length to ensure that there is always enough room to
591 		 * store 'c' if realloc succeeds, no matter what QBUF_INC is
592 		 * defined as.
593 		 */
594 		*buflen += (QBUF_INC + MB_CUR_MAX);
595 		if ((*buffer = realloc(*buffer, *buflen)) == NULL) {
596 			perror(gettext("xargs: Memory allocation failure"));
597 			exit(1);
598 		}
599 	}
600 	/* store bytes from wchar into buffer */
601 	bytes = wctomb(*buffer + offset, c);
602 	if (bytes == -1) {
603 		/* char was invalid */
604 		bytes = 1;
605 		*(*buffer + offset) = (char)c;
606 	}
607 
608 	/* return new value for offset */
609 	return (offset + bytes);
610 }
611 
612 static char *
613 getarg()
614 {
615 	int	bytes;
616 	wchar_t	c;
617 	char	*arg;
618 	char	*retarg, *requeue_buf;
619 	size_t  requeue_offset = 0, requeue_len;
620 	char	mbc[MB_LEN_MAX];
621 
622 	while (iswspace(c = getwchr()) || c == '\n')
623 		;
624 
625 	if (c == '\0') {
626 		MORE = FALSE;
627 		return (0);
628 	}
629 
630 	/*
631 	 * While we are reading in an argument, it is possible that we will
632 	 * reach the maximum length of the overflow buffer and we'll have to
633 	 * requeue what we have read so far. To handle this we allocate an
634 	 * initial buffer here which will keep an unprocessed copy of the data
635 	 * that we read in (this buffer will grow as required).
636 	 */
637 	requeue_len = (size_t)QBUF_STARTLEN;
638 	if ((requeue_buf = (char *)malloc(requeue_len)) == NULL) {
639 		perror(gettext("xargs: Memory allocation failure"));
640 		exit(1);
641 	}
642 
643 	for (arg = next; ; c = getwchr()) {
644 		bytes = wctomb(mbc, c);
645 
646 		/*
647 		 * Store the char that we have read before processing it in case
648 		 * the current argument needs to be requeued.
649 		 */
650 		requeue_offset = store_wchr(&requeue_buf, &requeue_len,
651 		    requeue_offset, c);
652 
653 		/* Check for overflow the input buffer */
654 		if ((next + ((bytes == -1) ? 1 : bytes)) >= &argbuf[BUFLIM]) {
655 			/*
656 			 * It's only an error if there are no Args in buffer
657 			 * already.
658 			 */
659 			if ((N_ARGS || PER_LINE) && LEGAL) {
660 				ERR = TRUE;
661 				ermsg(gettext("Argument list too long\n"));
662 				free(requeue_buf);
663 				return (0);
664 			} else if (N_args == 0) {
665 				lastarg = "";
666 				ERR = TRUE;
667 				ermsg(gettext("A single arg was greater than "
668 				    "the max arglist size of %d characters\n"),
669 				    BUFSIZE);
670 				free(requeue_buf);
671 				return (0);
672 			}
673 			/*
674 			 * Otherwise we put back the current argument
675 			 * and use what we have collected so far...
676 			 */
677 			queue(requeue_buf, requeue_offset, HEAD);
678 			/* reset inquote because we have requeued the quotes */
679 			inquote = 0;
680 			return (NULL);
681 		}
682 
683 
684 		if (iswctype(c, blank) && inquote == 0) {
685 			if (INSERT) {
686 				if (bytes == -1) {
687 					*next++ = (char)c;
688 				} else {
689 					(void) wctomb(next, c);
690 					next += bytes;
691 				}
692 				continue;
693 			}
694 
695 			/* skip over trailing whitespace till next arg */
696 			while (iswctype((c = getwchr()), blank) &&
697 			    (c != '\n') && (c != '\0'))
698 				;
699 
700 			/*
701 			 * if there was space till end of line then the last
702 			 * character was really a newline...
703 			 */
704 			if (c == L'\n' || c == L'\0') {
705 				ungetwchr(L'\n');
706 			} else {
707 				/* later code needs to know this was a space */
708 				ungetwchr(c);
709 				c = L' ';
710 			}
711 			goto end_arg;
712 		}
713 		switch (c) {
714 		case L'\0':
715 		case L'\n':
716 			if (inquote) {
717 				*next++ = '\0';
718 				ermsg(gettext("Missing quote: %s\n"), arg);
719 				ERR = TRUE;
720 				free(requeue_buf);
721 				return (0);
722 			}
723 
724 			N_lines++;
725 end_arg:		*next++ = '\0';
726 			/* we finished without requeuing so free requeue_buf */
727 			free(requeue_buf);
728 			if ((strcmp(arg, LEOF) == 0 && *LEOF != '\0') ||
729 			    (c == '\0' && strlen(arg) == 0)) {
730 				MORE = FALSE;
731 				/* absorb the rest of the line */
732 				if ((c != '\n') && (c != '\0'))
733 					while (c = getwchr())
734 						if ((c == '\n') || (c == '\0'))
735 							break;
736 				return (0);
737 			} else {
738 				++N_args;
739 				if (retarg = checklen(arg)) {
740 					if ((PER_LINE &&
741 					    N_lines >= PER_LINE &&
742 					    (c == '\0' || c == '\n')) ||
743 					    (N_ARGS && N_args >= N_ARGS)) {
744 						N_lines = N_args = 0;
745 						lastarg = "";
746 						OK = FALSE;
747 					}
748 				}
749 				return (retarg);
750 			}
751 
752 		case '"':
753 			if (inquote == 1)	/* in single quoted string */
754 				goto is_default;
755 			if (inquote == 2)	/* terminating double quote */
756 				inquote = 0;
757 			else			/* starting quoted string */
758 				inquote = 2;
759 			break;
760 
761 		case '\'':
762 			if (inquote == 2)	/* in double quoted string */
763 				goto is_default;
764 			if (inquote == 1)	/* terminating single quote */
765 				inquote = 0;
766 			else			/* starting quoted string */
767 				inquote = 1;
768 			break;
769 
770 		case L'\\':
771 			c = getwchr();
772 			/* store quoted char for potential requeueing */
773 			requeue_offset = store_wchr(&requeue_buf, &requeue_len,
774 			    requeue_offset, c);
775 
776 		default:
777 is_default:		if (bytes == -1) {
778 				*next++ = (char)c;
779 			} else {
780 				(void) wctomb(next, c);
781 				next += bytes;
782 			}
783 			break;
784 		}
785 	}
786 }
787 
788 
789 /*
790  * ermsg():	print out an error message, and indicate failure globally.
791  *
792  *	Assumes that message has already been gettext()'d. It would be
793  *	nice if we could just do the gettext() here, but we can't, since
794  *	since xgettext(1M) wouldn't be able to pick up our error message.
795  */
796 /* PRINTFLIKE1 */
797 static void
798 ermsg(char *messages, ...)
799 {
800 	va_list	ap;
801 
802 	va_start(ap, messages);
803 
804 	(void) fprintf(stderr, "xargs: ");
805 	(void) vfprintf(stderr, messages, ap);
806 
807 	va_end(ap);
808 	OK = FALSE;
809 }
810 
811 
812 /*
813  * Function: int rpmatch(char *)
814  *
815  * Description:
816  *
817  *	Internationalized get yes / no answer.
818  *
819  * Inputs:
820  *	s	-> Pointer to answer to compare against.
821  *
822  * Returns:
823  *	TRUE	-> Answer was affirmative
824  *	FALSE	-> Answer was negative
825  */
826 
827 static int
828 rpmatch(char *s)
829 {
830 	static char	*default_yesexpr = "^[Yy].*";
831 	static char	*compiled_yesexpr = (char *)NULL;
832 
833 	/* Execute once to initialize */
834 	if (compiled_yesexpr == (char *)NULL) {
835 		char	*yesexpr;
836 
837 		/* get yes expression according to current locale */
838 		yesexpr = nl_langinfo(YESEXPR);
839 		/*
840 		 * If the was no expression or if there is a compile error
841 		 * use default yes expression.  Anchor
842 		 */
843 		if ((yesexpr == (char *)NULL) || (*yesexpr == (char)NULL) ||
844 		    ((compiled_yesexpr =
845 		    regcmp(yesexpr, 0)) == NULL))
846 			compiled_yesexpr = regcmp(default_yesexpr, 0);
847 	}
848 
849 	/* match yesexpr */
850 	if (regex(compiled_yesexpr, s) == NULL) {
851 		return (FALSE);
852 	}
853 	return (TRUE);
854 }
855 
856 static int
857 echoargs()
858 {
859 	char	**anarg;
860 	char	**tanarg;	/* tmp ptr			*/
861 	int		i;
862 	char		reply[LINE_MAX];
863 
864 	tanarg = anarg = arglist-1;
865 
866 	/*
867 	 * write out each argument, separated by a space. the tanarg
868 	 * nonsense is for xcu4 testsuite compliance - so that an
869 	 * extra space isn't echoed after the last argument.
870 	 */
871 	while (*++anarg) {		/* while there's an argument	*/
872 		++tanarg;		/* follow anarg			*/
873 		(void) write(2, *anarg, strlen(*anarg));
874 
875 		if (*++tanarg) {	/* if there's another argument:	*/
876 			(void) write(2, " ", 1); /* add a space		*/
877 			--tanarg;	/* reset back to anarg		*/
878 		}
879 	}
880 	if (PROMPT == -1) {
881 		(void) write(2, "\n", 1);
882 		return (TRUE);
883 	}
884 
885 	/*
886 	 * at this point, there may be unexpected input pending on stdin,
887 	 * if one has used the -n flag. this presents a problem, because
888 	 * if we simply do a read(), we'll get the extra input, instead
889 	 * of our desired y/n input. so, we see if there's any extra
890 	 * input, and if there is, then we will store it.
891 	 */
892 
893 	saveinput();
894 
895 	(void) write(2, "?...", 4);	/* ask the user for input	*/
896 
897 	for (i = 0; i < LINE_MAX && read(PROMPT, &reply[i], 1) > 0; i++) {
898 		if (reply[i] == '\n') {
899 			if (i == 0)
900 				return (FALSE);
901 			break;
902 		}
903 	}
904 	reply[i] = 0;
905 
906 	/* flush remainder of line if necessary */
907 	if (i == LINE_MAX) {
908 		char	bitbucket;
909 
910 		while ((read(PROMPT, &bitbucket, 1) > 0) && (bitbucket != '\n'))
911 			;
912 	}
913 
914 	/*
915 	 * now we have to figure out whether the user typed an
916 	 * internationalized version of 'y' for yes. note that in some
917 	 * countries, they've gotten used to typing an ASCII 'y'! so
918 	 * even if our int'l version fails, we will check for an ASCII
919 	 * 'y', in order to be backwards compatible.
920 	 */
921 	return (rpmatch(reply));
922 }
923 
924 
925 static char *
926 insert(char *pattern, char *subst)
927 {
928 	static char	buffer[MAXSBUF+1];
929 	int		len, ipatlen;
930 	char	*pat;
931 	char	*bufend;
932 	char	*pbuf;
933 
934 	len = strlen(subst);
935 	ipatlen = strlen(INSPAT) - 1;
936 	pat = pattern - 1;
937 	pbuf = buffer;
938 	bufend = &buffer[MAXSBUF];
939 
940 	while (*++pat) {
941 		if (xindex(pat, INSPAT) == 0) {
942 			if (pbuf + len >= bufend) {
943 				break;
944 			} else {
945 				(void) strcpy(pbuf, subst);
946 				pat += ipatlen;
947 				pbuf += len;
948 			}
949 		} else {
950 			*pbuf++ = *pat;
951 			if (pbuf >= bufend)
952 				break;
953 		}
954 	}
955 
956 	if (!*pat) {
957 		*pbuf = '\0';
958 		return (buffer);
959 	} else {
960 		ermsg(gettext("Maximum argument size with insertion via %s's "
961 		    "exceeded\n"), INSPAT);
962 		ERR = TRUE;
963 		return (0);
964 	}
965 }
966 
967 
968 static void
969 addibuf(struct inserts	*p)
970 {
971 	char	*newarg, *skel, *sub;
972 	int		l;
973 
974 	skel = p->p_skel;
975 	sub = *ARGV;
976 	linesize -= strlen(skel) + 1;
977 	newarg = insert(skel, sub);
978 	if (ERR)
979 		return;
980 
981 	if (checklen(newarg)) {
982 		if ((ibufsize += (l = strlen(newarg) + 1)) > MAXIBUF) {
983 			ermsg(gettext("Insert buffer overflow\n"));
984 			ERR = TRUE;
985 		}
986 		(void) strcpy(p_ibuf, newarg);
987 		*(p->p_ARGV) = p_ibuf;
988 		p_ibuf += l;
989 	}
990 }
991 
992 
993 /*
994  * getchr():	get the next character.
995  * description:
996  *	we get the next character from pio.structure, if there's a character
997  *	to get. this may happen when we've had to flush stdin=/dev/tty,
998  *	but still wanted to preserve the characters for later processing.
999  *
1000  *	otherwise we just get the character from stdin.
1001  */
1002 static int
1003 getchr(void)
1004 {
1005 	char	c;
1006 
1007 	do {
1008 		if (queued_data == NULL) {
1009 			char	*buffer;
1010 			int	len;
1011 
1012 			if ((buffer = malloc(BUFSIZE)) == NULL) {
1013 				perror(gettext(
1014 				    "xargs: Memory allocation failure"));
1015 				exit(1);
1016 			}
1017 
1018 			if ((len = read(0, buffer, BUFSIZE)) == 0)
1019 				return (0);
1020 			if (len == -1) {
1021 				perror(gettext("xargs: Read failure"));
1022 				exit(1);
1023 			}
1024 
1025 			queue(buffer, len, TAIL);
1026 		}
1027 
1028 		file_offset++;
1029 		c = *queued_data->cur++;	 /* get the next character */
1030 		if (--queued_data->length == 0) { /* at the end of buffer? */
1031 			pio	*nxt = queued_data->next;
1032 
1033 			free(queued_data->start);
1034 			free(queued_data);
1035 			queued_data = nxt;
1036 		}
1037 	} while (c == '\0');
1038 	return (c);
1039 }
1040 
1041 
1042 static wchar_t
1043 getwchr(void)
1044 {
1045 	int		i;
1046 	wchar_t		wch;
1047 	unsigned char	buffer[MB_LEN_MAX + 1];
1048 
1049 	for (i = 0; i < (int)MB_CUR_MAX; ) {
1050 		if ((buffer[i++] = getchr()) == NULL) {
1051 			/* We have reached  EOF */
1052 			if (i == 1) {
1053 				/* TRUE EOF has been reached */
1054 				return (NULL);
1055 			}
1056 			/*
1057 			 * We have some characters in our buffer still so it
1058 			 * must be an invalid character right before EOF.
1059 			 */
1060 			break;
1061 		}
1062 
1063 		/* If this succeeds then we are done */
1064 		if (mbtowc(&wch, (char *)buffer, i) != -1)
1065 			return (wch);
1066 	}
1067 
1068 	/*
1069 	 * We have now encountered an illegal character sequence.
1070 	 * There is nothing much we can do at this point but
1071 	 * return an error.  If we attempt to recover we may in fact
1072 	 * return garbage as arguments, from the customer's point
1073 	 * of view.  After all what if they are feeding us a file
1074 	 * generated in another locale?
1075 	 */
1076 	errno = EILSEQ;
1077 	perror(gettext("xargs: Corrupt input file"));
1078 	exit(1);
1079 	/* NOTREACHED */
1080 }
1081 
1082 
1083 static void
1084 ungetwchr(wchar_t wch)
1085 {
1086 	char	*buffer;
1087 	int	bytes;
1088 
1089 	if ((buffer = malloc(MB_LEN_MAX)) == NULL) {
1090 		perror(gettext("xargs: Memory allocation failure"));
1091 		exit(1);
1092 	}
1093 	bytes = wctomb(buffer, wch);
1094 	queue(buffer, bytes, HEAD);
1095 }
1096 
1097 
1098 static int
1099 lcall(char *sub, char **subargs)
1100 {
1101 	int retcode, retry = 0;
1102 	pid_t iwait, child;
1103 
1104 	for (; ; ) {
1105 		switch (child = fork()) {
1106 		default:
1107 			while ((iwait = wait(&retcode)) != child &&
1108 			    iwait != (pid_t)-1)
1109 				;
1110 			if (iwait == (pid_t)-1) {
1111 				perror(gettext("xargs: Wait failure"));
1112 				exit(122);
1113 				/* NOTREACHED */
1114 			}
1115 			if (WIFSIGNALED(retcode)) {
1116 				ermsg(gettext("Child killed with signal %d\n"),
1117 				    WTERMSIG(retcode));
1118 				exit(125);
1119 				/* NOTREACHED */
1120 			}
1121 			if ((WEXITSTATUS(retcode) & 0377) == 0377) {
1122 				ermsg(gettext("Command could not continue "
1123 				    "processing data\n"));
1124 				exit(124);
1125 				/* NOTREACHED */
1126 			}
1127 			return (WEXITSTATUS(retcode));
1128 		case 0:
1129 			(void) execvp(sub, subargs);
1130 			perror(gettext("xargs: Could not exec command"));
1131 			if (errno == EACCES)
1132 				exit(126);
1133 			exit(127);
1134 			/* NOTREACHED */
1135 		case -1:
1136 			if (errno != EAGAIN && retry++ < FORK_RETRY) {
1137 				perror(gettext("xargs: Could not fork child"));
1138 				exit(123);
1139 			}
1140 			(void) sleep(1);
1141 		}
1142 	}
1143 }
1144 
1145 
1146 /*
1147  * If `s2' is a substring of `s1' return the offset of the first
1148  * occurrence of `s2' in `s1', else return -1.
1149  */
1150 static int
1151 xindex(char *as1, char *as2)
1152 {
1153 	char	*s1, *s2, c;
1154 	int		offset;
1155 
1156 	s1 = as1;
1157 	s2 = as2;
1158 	c = *s2;
1159 
1160 	while (*s1) {
1161 		if (*s1++ == c) {
1162 			offset = s1 - as1 - 1;
1163 			s2++;
1164 			while ((c = *s2++) == *s1++ && c)
1165 				;
1166 			if (c == 0)
1167 				return (offset);
1168 			s1 = offset + as1 + 1;
1169 			s2 = as2;
1170 			c = *s2;
1171 		}
1172 	}
1173 	return (-1);
1174 }
1175 
1176 
1177 static void
1178 usage()
1179 {
1180 	ermsg(gettext(USAGEMSG));
1181 	OK = FALSE;
1182 }
1183 
1184 
1185 
1186 /*
1187  * parseargs():		modify the args
1188  *	since the -e, -i and -l flags all take optional subarguments,
1189  *	and getopts(3C) is clueless about this nonsense, we change the
1190  *	our local argument count and strings to separate this out,
1191  *	and make it easier to handle via getopts(3c).
1192  *
1193  *	-e	-> "-e ""
1194  *	-e3	-> "-e "3"
1195  *	-Estr	-> "-E "str"
1196  *	-i	-> "-i "{}"
1197  *	-irep	-> "-i "rep"
1198  *	-l	-> "-i "1"
1199  *	-l10	-> "-i "10"
1200  *
1201  *	since the -e, -i and -l flags all take optional subarguments,
1202  */
1203 static void
1204 parseargs(int ac, char **av)
1205 {
1206 	int i;			/* current argument			*/
1207 	int cflag;		/* 0 = not processing cmd arg		*/
1208 
1209 	if ((mav = malloc((ac * 2 + 1) * sizeof (char *))) == NULL) {
1210 		perror(gettext("xargs: Memory allocation failure"));
1211 		exit(1);
1212 	}
1213 
1214 	/* for each argument, see if we need to change things:		*/
1215 	for (i = mac = cflag = 0; (av[i] != NULL) && i < ac; i++, mac++) {
1216 		if ((mav[mac] = strdup(av[i])) == NULL) {
1217 			perror(gettext("xargs: Memory allocation failure"));
1218 			exit(1);
1219 		}
1220 
1221 		/* -- has been found or argument list is fully processes */
1222 		if (cflag)
1223 			continue;
1224 
1225 		/*
1226 		 * if we're doing special processing, and we've got a flag
1227 		 */
1228 		else if ((av[i][0] == '-') && (av[i][1] != NULL)) {
1229 			char	*def;
1230 
1231 			switch (av[i][1]) {
1232 			case	'e':
1233 				def = ""; /* -e with no arg turns off eof */
1234 				goto process_special;
1235 			case	'i':
1236 				def = INSPAT_STR;
1237 				goto process_special;
1238 			case	'l':
1239 				def = "1";
1240 process_special:
1241 				/*
1242 				 * if there's no sub-option, we *must* add
1243 				 * a default one. this is because xargs must
1244 				 * be able to distinguish between a valid
1245 				 * suboption, and a command name.
1246 				 */
1247 				if (av[i][2] == NULL) {
1248 					mav[++mac] = strdup(def);
1249 				} else {
1250 					/* clear out our version: */
1251 					mav[mac][2] = NULL;
1252 					mav[++mac] = strdup(&av[i][2]);
1253 				}
1254 				if (mav[mac] == NULL) {
1255 					perror(gettext("xargs: Memory"
1256 					    " allocation failure"));
1257 					exit(1);
1258 				}
1259 				break;
1260 
1261 			/* flags with required subarguments:		*/
1262 
1263 			/*
1264 			 * there are two separate cases here. either the
1265 			 * flag can have the normal XCU4 handling
1266 			 * (of the form: -X subargument); or it can have
1267 			 * the old solaris 2.[0-4] handling (of the
1268 			 * form: -Xsubargument). in order to maintain
1269 			 * backwards compatibility, we must support the
1270 			 * latter case. we handle the latter possibility
1271 			 * first so both the old solaris way of handling
1272 			 * and the new XCU4 way of handling things are allowed.
1273 			 */
1274 			case	'n':	/* FALLTHROUGH			*/
1275 			case	's':	/* FALLTHROUGH			*/
1276 			case	'E':	/* FALLTHROUGH			*/
1277 			case	'I':	/* FALLTHROUGH			*/
1278 			case	'L':
1279 				/*
1280 				 * if the second character isn't null, then
1281 				 * the user has specified the old syntax.
1282 				 * we move the subargument into our
1283 				 * mod'd argument list.
1284 				 */
1285 				if (av[i][2] != NULL) {
1286 					/* first clean things up:	*/
1287 					mav[mac][2] = NULL;
1288 
1289 					/* now add the separation:	*/
1290 					++mac;	/* inc to next mod'd arg */
1291 					if ((mav[mac] = strdup(&av[i][2])) ==
1292 					    NULL) {
1293 						perror(gettext("xargs: Memory"
1294 						    " allocation failure"));
1295 						exit(1);
1296 					}
1297 					break;
1298 				}
1299 				i++;
1300 				mac++;
1301 #ifdef XPG6
1302 				if (av[i] != NULL) {
1303 					if ((mav[mac] = strdup(av[i]))
1304 					    == NULL) {
1305 						perror(gettext("xargs: Memory"
1306 						    " allocation failure"));
1307 						exit(1);
1308 					}
1309 				}
1310 #else
1311 				if (av[i] == NULL) {
1312 					if ((mav[mac++] = strdup("")) == NULL) {
1313 						perror(gettext("xargs: Memory "
1314 						    " allocation failure"));
1315 						exit(1);
1316 					}
1317 					mav[mac] = NULL;
1318 					return;
1319 				}
1320 				if ((mav[mac] = strdup(av[i])) == NULL) {
1321 					perror(gettext("xargs: Memory"
1322 					    " allocation failure"));
1323 					exit(1);
1324 				}
1325 
1326 #endif
1327 				break;
1328 
1329 			/* flags */
1330 			case 'p' :
1331 			case 't' :
1332 			case 'x' :
1333 				break;
1334 
1335 			case '-' :
1336 			default:
1337 				/*
1338 				 * here we've hit the cmd argument. so
1339 				 * we'll stop special processing, as the
1340 				 * cmd may have a "-i" etc., argument,
1341 				 * and we don't want to add a "" to it.
1342 				 */
1343 				cflag = 1;
1344 				break;
1345 			}
1346 		} else if (i > 0) {	/* if we're not the 1st arg	*/
1347 			/*
1348 			 * if it's not a flag, then it *must* be the cmd.
1349 			 * set cflag, so we don't mishandle the -[eil] flags.
1350 			 */
1351 			cflag = 1;
1352 		}
1353 	}
1354 
1355 	mav[mac] = NULL;
1356 }
1357 
1358 
1359 /*
1360  * saveinput(): pick up any pending input, so it can be processed later.
1361  *
1362  * description:
1363  *	the purpose of this routine is to allow us to handle the user
1364  *	typing in a 'y' or 'n', when there's existing characters already
1365  *	in stdin. this happens when one gives the "-n" option along with
1366  *	"-p". the problem occurs when the user first types in more arguments
1367  *	than specified by the -n number. echoargs() wants to read stdin
1368  *	in order to get the user's response, but if there's already stuff
1369  *	there, echoargs() won't read the proper character.
1370  *
1371  *	the solution provided by this routine is to pick up all characters
1372  *	(if any), and store them for later processing.
1373  */
1374 
1375 void
1376 saveinput()
1377 {
1378 	char *buffer;		/* ptr to the floating data buffer	*/
1379 	struct strpeek speek;	/* to see what's on the queue		*/
1380 	struct strpeek *ps;
1381 
1382 	/* if we're not in -p mode, skip				*/
1383 	if (PROMPT == -1) {
1384 		return;
1385 	}
1386 
1387 
1388 	/* now see if there's any activity pending:			*/
1389 	ps = &speek;
1390 	ps->ctlbuf.maxlen = 0;
1391 	ps->ctlbuf.len = 0;
1392 	ps->ctlbuf.buf = NULL;
1393 	ps->flags = 0;
1394 	ps->databuf.maxlen = MAX_INPUT;
1395 	ps->databuf.len = 0;
1396 	if ((buffer = malloc((size_t)MAX_INPUT)) == NULL) {
1397 		perror(gettext("xargs: Memory allocation failure"));
1398 		exit(1);
1399 	}
1400 	ps->databuf.buf = (char *)buffer;
1401 
1402 	if (ioctl(PROMPT, I_PEEK, ps) == -1) {
1403 		perror(gettext("xargs: I_PEEK failure"));
1404 		exit(1);
1405 	}
1406 
1407 	if (ps->databuf.len > 0) {
1408 		int	len;
1409 
1410 		if ((len = read(PROMPT, buffer, ps->databuf.len)) == -1) {
1411 			perror(gettext("xargs: read failure"));
1412 			exit(1);
1413 		}
1414 		queue(buffer, len, TAIL);
1415 	}
1416 }
1417