xref: /illumos-gate/usr/src/cmd/csh/sh.dol.c (revision 7c478bd9)
1 /*
2  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved.  The Berkeley Software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #pragma ident	"%Z%%M%	%I%	%E% SMI"
16 
17 #include <unistd.h>		/* for lseek prototype */
18 #include "sh.h"
19 #include "sh.tconst.h"
20 
21 /*
22  * C shell
23  */
24 
25 /*
26  * These routines perform variable substitution and quoting via ' and ".
27  * To this point these constructs have been preserved in the divided
28  * input words.  Here we expand variables and turn quoting via ' and " into
29  * QUOTE bits on characters (which prevent further interpretation).
30  * If the `:q' modifier was applied during history expansion, then
31  * some QUOTEing may have occurred already, so we dont "trim()" here.
32  */
33 
34 int	Dpeekc, Dpeekrd;		/* Peeks for DgetC and Dreadc */
35 tchar	*Dcp, **Dvp;			/* Input vector for Dreadc */
36 
37 #define	DEOF	-1
38 
39 #define	unDgetC(c)	Dpeekc = c
40 
41 #define QUOTES		(_Q|_Q1|_ESC)	/* \ ' " ` */
42 
43 /*
44  * The following variables give the information about the current
45  * $ expansion, recording the current word position, the remaining
46  * words within this expansion, the count of remaining words, and the
47  * information about any : modifier which is being applied.
48  */
49 tchar	*dolp;			/* Remaining chars from this word */
50 tchar	**dolnxt;		/* Further words */
51 int	dolcnt;			/* Count of further words */
52 tchar	dolmod;			/* : modifier character */
53 int	dolmcnt;		/* :gx -> 10000, else 1 */
54 
55 /*
56  * Fix up the $ expansions and quotations in the
57  * argument list to command t.
58  */
59 Dfix(t)
60 	register struct command *t;
61 {
62 	register tchar **pp;
63 	register tchar *p;
64 
65 #ifdef TRACE
66 	tprintf("TRACE- Dfix()\n");
67 #endif
68 	if (noexec)
69 		return;
70 	/* Note that t_dcom isn't trimmed thus !...:q's aren't lost */
71 	for (pp = t->t_dcom; p = *pp++;)
72 		while (*p)
73 			if (cmap(*p++, _DOL|QUOTES)) {	/* $, \, ', ", ` */
74 				Dfix2(t->t_dcom);	/* found one */
75 				blkfree(t->t_dcom);
76 				t->t_dcom = gargv;
77 				gargv = 0;
78 				return;
79 			}
80 }
81 
82 /*
83  * $ substitute one word, for i/o redirection
84  */
85 tchar *
86 Dfix1(cp)
87 	register tchar *cp;
88 {
89 	tchar *Dv[2];
90 
91 #ifdef TRACE
92 	tprintf("TRACE- Dfix1()\n");
93 #endif
94 	if (noexec)
95 		return (0);
96 	Dv[0] = cp; Dv[1] = NOSTR;
97 	Dfix2(Dv);
98 	if (gargc != 1) {
99 		setname(cp);
100 		bferr("Ambiguous");
101 	}
102 	cp = savestr(gargv[0]);
103 	blkfree(gargv), gargv = 0;
104 	return (cp);
105 }
106 
107 /*
108  * Subroutine to do actual fixing after state initialization.
109  */
110 Dfix2(v)
111      tchar **v;
112 {
113 	tchar *agargv[GAVSIZ];
114 
115 #ifdef TRACE
116 	tprintf("TRACE- Dfix2()\n");
117 #endif
118 	ginit(agargv);			/* Initialize glob's area pointers */
119 	Dvp = v; Dcp = S_ /* "" */;/* Setup input vector for Dreadc */
120 	unDgetC(0); unDredc(0);		/* Clear out any old peeks (at error) */
121 	dolp = 0; dolcnt = 0;		/* Clear out residual $ expands (...) */
122 	while (Dword())
123 		continue;
124 	gargv = copyblk(gargv);
125 }
126 
127 /*
128  * Get a word.  This routine is analogous to the routine
129  * word() in sh.lex.c for the main lexical input.  One difference
130  * here is that we don't get a newline to terminate our expansion.
131  * Rather, DgetC will return a DEOF when we hit the end-of-input.
132  */
133 Dword()
134 {
135 	register int c, c1;
136 	static tchar *wbuf = NULL;
137 	static int wbufsiz = BUFSIZ;
138 	register int wp = 0;
139 	register bool dolflg;
140 	bool sofar = 0;
141 #define	DYNAMICBUFFER() \
142 	do { \
143 		if (wp >= wbufsiz) { \
144 			wbufsiz += BUFSIZ; \
145 			wbuf = xrealloc(wbuf, (wbufsiz+1) * sizeof (tchar)); \
146 		} \
147 	} while (0)
148 
149 #ifdef TRACE
150 	tprintf("TRACE- Dword()\n");
151 #endif
152 	if (wbuf == NULL)
153 		wbuf = xalloc((wbufsiz+1) * sizeof (tchar));
154 loop:
155 	c = DgetC(DODOL);
156 	switch (c) {
157 
158 	case DEOF:
159 deof:
160 		if (sofar == 0)
161 			return (0);
162 		/* finish this word and catch the code above the next time */
163 		unDredc(c);
164 		/* fall into ... */
165 
166 	case '\n':
167 		wbuf[wp] = 0;
168 		goto ret;
169 
170 	case ' ':
171 	case '\t':
172 		goto loop;
173 
174 	case '`':
175 		/* We preserve ` quotations which are done yet later */
176 		wbuf[wp++] = c;
177 	case '\'':
178 	case '"':
179 		/*
180 		 * Note that DgetC never returns a QUOTES character
181 		 * from an expansion, so only true input quotes will
182 		 * get us here or out.
183 		 */
184 		c1 = c;
185 		dolflg = c1 == '"' ? DODOL : 0;
186 		for (;;) {
187 			c = DgetC(dolflg);
188 			if (c == c1)
189 				break;
190 			if (c == '\n' || c == DEOF)
191 				error("Unmatched %c", (tchar) c1);
192 			if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE))
193 				--wp;
194 			DYNAMICBUFFER();
195 			switch (c1) {
196 
197 			case '"':
198 				/*
199 				 * Leave any `s alone for later.
200 				 * Other chars are all quoted, thus `...`
201 				 * can tell it was within "...".
202 				 */
203 				wbuf[wp++] = c == '`' ? '`' : c | QUOTE;
204 				break;
205 
206 			case '\'':
207 				/* Prevent all further interpretation */
208 				wbuf[wp++] = c | QUOTE;
209 				break;
210 
211 			case '`':
212 				/* Leave all text alone for later */
213 				wbuf[wp++] = c;
214 				break;
215 			}
216 		}
217 		if (c1 == '`') {
218 			DYNAMICBUFFER();
219 			wbuf[wp++] = '`';
220 		}
221 		goto pack;		/* continue the word */
222 
223 	case '\\':
224 		c = DgetC(0);		/* No $ subst! */
225 		if (c == '\n' || c == DEOF)
226 			goto loop;
227 		c |= QUOTE;
228 		break;
229 #ifdef MBCHAR /* Could be a space char from aux. codeset. */
230 	default:
231 		if (isauxsp(c)) goto loop;
232 #endif /* MBCHAR */
233 	}
234 	unDgetC(c);
235 pack:
236 	sofar = 1;
237 	/* pack up more characters in this word */
238 	for (;;) {
239 		c = DgetC(DODOL);
240 		if (c == '\\') {
241 			c = DgetC(0);
242 			if (c == DEOF)
243 				goto deof;
244 			if (c == '\n')
245 				c = ' ';
246 			else
247 				c |= QUOTE;
248 		}
249 		if (c == DEOF)
250 			goto deof;
251 		if (cmap(c, _SP|_NL|_Q|_Q1) ||
252 		    isauxsp(c)) {		/* sp \t\n'"` or aux. sp */
253 			unDgetC(c);
254 			if (cmap(c, QUOTES))
255 				goto loop;
256 			DYNAMICBUFFER();
257 			wbuf[wp++] = 0;
258 			goto ret;
259 		}
260 		DYNAMICBUFFER();
261 		wbuf[wp++] = c;
262 	}
263 ret:
264 	Gcat(S_ /* "" */, wbuf);
265 	return (1);
266 }
267 
268 /*
269  * Get a character, performing $ substitution unless flag is 0.
270  * Any QUOTES character which is returned from a $ expansion is
271  * QUOTEd so that it will not be recognized above.
272  */
273 DgetC(flag)
274 	register int flag;
275 {
276 	register int c;
277 
278 top:
279 	if (c = Dpeekc) {
280 		Dpeekc = 0;
281 		return (c);
282 	}
283 	if (lap) {
284 		c = *lap++ & (QUOTE|TRIM);
285 		if (c == 0) {
286 			lap = 0;
287 			goto top;
288 		}
289 quotspec:
290 		/*
291 		 *	don't quote things if there was an error (err!=0)
292 		 * 	the input is original, not from a substitution and
293 		 *	therefore should not be quoted
294 		 */
295 		if (!err && cmap(c, QUOTES))
296 			return (c | QUOTE);
297 		return (c);
298 	}
299 	if (dolp) {
300 		if (c = *dolp++ & (QUOTE|TRIM))
301 			goto quotspec;
302 		if (dolcnt > 0) {
303 			setDolp(*dolnxt++);
304 			--dolcnt;
305 			return (' ');
306 		}
307 		dolp = 0;
308 	}
309 	if (dolcnt > 0) {
310 		setDolp(*dolnxt++);
311 		--dolcnt;
312 		goto top;
313 	}
314 	c = Dredc();
315 	if (c == '$' && flag) {
316 		Dgetdol();
317 		goto top;
318 	}
319 	return (c);
320 }
321 
322 tchar *nulvec[] = { 0 };
323 struct	varent nulargv = { nulvec, S_argv, 0 };
324 
325 /*
326  * Handle the multitudinous $ expansion forms.
327  * Ugh.
328  */
329 Dgetdol()
330 {
331 	register tchar *np;
332 	register struct varent *vp;
333 	tchar name[MAX_VREF_LEN];
334 	int c, sc;
335 	int subscr = 0, lwb = 1, upb = 0;
336 	bool dimen = 0, bitset = 0;
337 	tchar wbuf[BUFSIZ];
338 
339 #ifdef TRACE
340 	tprintf("TRACE- Dgetdol()\n");
341 #endif
342 	dolmod = dolmcnt = 0;
343 	c = sc = DgetC(0);
344 	if (c == '{')
345 		c = DgetC(0);		/* sc is { to take } later */
346 	if ((c & TRIM) == '#')
347 		dimen++, c = DgetC(0);		/* $# takes dimension */
348 	else if (c == '?')
349 		bitset++, c = DgetC(0);		/* $? tests existence */
350 	switch (c) {
351 
352 	case '$':
353 		if (dimen || bitset)
354 syntax:
355 		error("Variable syntax");  /* No $?$, $#$ */
356 		setDolp(doldol);
357 		goto eatbrac;
358 
359 	case '<'|QUOTE:
360 		if (dimen || bitset)
361 			goto syntax;		/* No $?<, $#< */
362 		for (np = wbuf; read_(OLDSTD, np, 1) == 1; np++) {
363 			if (np >= &wbuf[BUFSIZ-1])
364 				error("$< line too long");
365 			if (*np <= 0 || *np == '\n')
366 				break;
367 		}
368 		*np = 0;
369 		/*
370 		 * KLUDGE: dolmod is set here because it will
371 		 * cause setDolp to call domod and thus to copy wbuf.
372 		 * Otherwise setDolp would use it directly. If we saved
373 		 * it ourselves, no one would know when to free it.
374 		 * The actual function of the 'q' causes filename
375 		 * expansion not to be done on the interpolated value.
376 		 */
377 		dolmod = 'q';
378 		dolmcnt = 10000;
379 		setDolp(wbuf);
380 		goto eatbrac;
381 
382 	case DEOF:
383 	case '\n':
384 		goto syntax;
385 
386 	case '*':
387 		(void) strcpy_(name, S_argv);
388 		vp = adrof(S_argv);
389 		subscr = -1;			/* Prevent eating [...] */
390 		break;
391 
392 	default:
393 		np = name;
394 		if (digit(c)) {
395 			if (dimen)
396 				goto syntax;	/* No $#1, e.g. */
397 			subscr = 0;
398 			do {
399 				subscr = subscr * 10 + c - '0';
400 				c = DgetC(0);
401 			} while (digit(c));
402 			unDredc(c);
403 			if (subscr < 0)
404 				error("Subscript out of range");
405 			if (subscr == 0) {
406 				if (bitset) {
407 					dolp = file ? S_1/*"1"*/ : S_0/*"0"*/;
408 					goto eatbrac;
409 				}
410 				if (file == 0)
411 					error("No file for $0");
412 				setDolp(file);
413 				goto eatbrac;
414 			}
415 			if (bitset)
416 				goto syntax;
417 			vp = adrof(S_argv);
418 			if (vp == 0) {
419 				vp = &nulargv;
420 				goto eatmod;
421 			}
422 			break;
423 		}
424 		if (!alnum(c))
425 			goto syntax;
426 		for (;;) {
427 			*np++ = c;
428 			c = DgetC(0);
429 			if (!alnum(c))
430 				break;
431 			/* if variable name is > 20, complain */
432 			if (np >= &name[MAX_VAR_LEN])
433 				error("Variable name too long");
434 
435 		}
436 		*np++ = 0;
437 		unDredc(c);
438 		vp = adrof(name);
439 	}
440 	if (bitset) {
441 		/* getenv() to getenv_(), because 'name''s type is now tchar * */
442 		/* no need to xalloc */
443 		dolp = (vp || getenv_(name)) ? S_1 /*"1"*/ : S_0/*"0"*/;
444 		goto eatbrac;
445 	}
446 	if (vp == 0) {
447 		/* getenv() to getenv_(), because 'name''s type is now tchar * */
448 		/* no need to xalloc */
449 		np = getenv_(name);
450 		if (np) {
451 			addla(np);
452 			goto eatbrac;
453 		}
454 		udvar(name);
455 		/*NOTREACHED*/
456 	}
457 	c = DgetC(0);
458 	upb = blklen(vp->vec);
459 	if (dimen == 0 && subscr == 0 && c == '[') {
460 		np = name;
461 		for (;;) {
462 			c = DgetC(DODOL);	/* Allow $ expand within [ ] */
463 			if (c == ']')
464 				break;
465 			if (c == '\n' || c == DEOF)
466 				goto syntax;
467 			if (np >= &name[MAX_VREF_LEN])
468 				error("Variable reference too long");
469 			*np++ = c;
470 		}
471 		*np = 0, np = name;
472 		if (dolp || dolcnt)		/* $ exp must end before ] */
473 			goto syntax;
474 		if (!*np)
475 			goto syntax;
476 		if (digit(*np)) {
477 			register int i = 0;
478 
479 			while (digit(*np))
480 				i = i * 10 + *np++ - '0';
481 /*			if ((i < 0 || i > upb) && !any(*np, "-*")) {*/
482 			if ((i < 0 || i > upb) && (*np!='-') && (*np!='*')) {
483 oob:
484 				setname(vp->v_name);
485 				error("Subscript out of range");
486 			}
487 			lwb = i;
488 			if (!*np)
489 				upb = lwb, np = S_AST/*"*"*/;
490 		}
491 		if (*np == '*')
492 			np++;
493 		else if (*np != '-')
494 			goto syntax;
495 		else {
496 			register int i = upb;
497 
498 			np++;
499 			if (digit(*np)) {
500 				i = 0;
501 				while (digit(*np))
502 					i = i * 10 + *np++ - '0';
503 				if (i < 0 || i > upb)
504 					goto oob;
505 			}
506 			if (i < lwb)
507 				upb = lwb - 1;
508 			else
509 				upb = i;
510 		}
511 		if (lwb == 0) {
512 			if (upb != 0)
513 				goto oob;
514 			upb = -1;
515 		}
516 		if (*np)
517 			goto syntax;
518 	} else {
519 		if (subscr > 0)
520 			if (subscr > upb)
521 				lwb = 1, upb = 0;
522 			else
523 				lwb = upb = subscr;
524 		unDredc(c);
525 	}
526 	if (dimen) {
527 		tchar *cp = putn(upb - lwb + 1);
528 
529 		addla(cp);
530 		xfree(cp);
531 	} else {
532 eatmod:
533 		c = DgetC(0);
534 		if (c == ':') {
535 			c = DgetC(0), dolmcnt = 1;
536 			if (c == 'g')
537 				c = DgetC(0), dolmcnt = 10000;
538 			if (!any(c, S_htrqxe))
539 				error("Bad : mod in $");
540 			dolmod = c;
541 			if (c == 'q')
542 				dolmcnt = 10000;
543 		} else
544 			unDredc(c);
545 		dolnxt = &vp->vec[lwb - 1];
546 		dolcnt = upb - lwb + 1;
547 	}
548 eatbrac:
549 	if (sc == '{') {
550 		c = Dredc();
551 		if (c != '}')
552 			goto syntax;
553 	}
554 }
555 
556 setDolp(cp)
557 	register tchar *cp;
558 {
559 	register tchar *dp;
560 
561 #ifdef TRACE
562 	tprintf("TRACE- setDolp()\n");
563 #endif
564 	if (dolmod == 0 || dolmcnt == 0) {
565 		dolp = cp;
566 		return;
567 	}
568 	dp = domod(cp, dolmod);
569 	if (dp) {
570 		dolmcnt--;
571 		addla(dp);
572 		xfree(dp);
573 	} else
574 		addla(cp);
575 	dolp = S_/*""*/;
576 }
577 
578 unDredc(c)
579 	int c;
580 {
581 
582 	Dpeekrd = c;
583 }
584 
585 Dredc()
586 {
587 	register int c;
588 
589 	if (c = Dpeekrd) {
590 		Dpeekrd = 0;
591 		return (c);
592 	}
593 	if (Dcp && (c = *Dcp++))
594 		return (c&(QUOTE|TRIM));
595 	if (*Dvp == 0) {
596 		Dcp = 0;
597 		return (DEOF);
598 	}
599 	Dcp = *Dvp++;
600 	return (' ');
601 }
602 
603 Dtestq(c)
604 	register int c;
605 {
606 
607 	if (cmap(c, QUOTES))
608 		gflag = 1;
609 }
610 
611 /*
612  * Form a shell temporary file (in unit 0) from the words
613  * of the shell input up to a line the same as "term".
614  * Unit 0 should have been closed before this call.
615  */
616 heredoc(term)
617      tchar *term;
618 {
619 	register int c;
620 	tchar *Dv[2];
621 	tchar obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ];
622 	int ocnt, lcnt, mcnt;
623 	register tchar *lbp, *obp, *mbp;
624 	tchar **vp;
625 	bool quoted;
626 	tchar shtemp[] = {'/', 't', 'm', 'p', '/', 's', 'h', 'X', 'X', 'X',
627 'X', 'X', 'X', 0};
628 	int fd1;
629 
630 #ifdef TRACE
631 	tprintf("TRACE- heredoc()\n");
632 #endif
633 	if ((fd1 = mkstemp_(shtemp)) < 0)
634 		Perror(shtemp);
635 	(void) unlink_(shtemp);			/* 0 0 inode! */
636 	unsetfd(fd1);
637 	Dv[0] = term; Dv[1] = NOSTR; gflag = 0;
638 	trim(Dv); rscan(Dv, Dtestq); quoted = gflag;
639 	ocnt = BUFSIZ; obp = obuf;
640 	for (;;) {
641 		/*
642 		 * Read up a line
643 		 */
644 		lbp = lbuf; lcnt = BUFSIZ - 4;
645 		for (;;) {
646 			c = readc(1);		/* 1 -> Want EOF returns */
647 			if (c < 0) {
648 				setname(term);
649 				bferr("<< terminator not found");
650 			}
651 			if (c == '\n')
652 				break;
653 			if (c &= TRIM) {
654 				*lbp++ = c;
655 				if (--lcnt < 0) {
656 					setname(S_LESLES/*"<<"*/);
657 					error("Line overflow");
658 				}
659 			}
660 		}
661 		*lbp = 0;
662 
663 		/*
664 		 * Compare to terminator -- before expansion
665 		 */
666 		if (eq(lbuf, term)) {
667 			(void) write_(0, obuf, BUFSIZ - ocnt);
668 			(void) lseek(0, (off_t)0, 0);
669 			return;
670 		}
671 
672 		/*
673 		 * If term was quoted or -n just pass it on
674 		 */
675 		if (quoted || noexec) {
676 			*lbp++ = '\n'; *lbp = 0;
677 			for (lbp = lbuf; c = *lbp++;) {
678 				*obp++ = c;
679 				if (--ocnt == 0) {
680 					(void) write_(0, obuf, BUFSIZ);
681 					obp = obuf; ocnt = BUFSIZ;
682 				}
683 			}
684 			continue;
685 		}
686 
687 		/*
688 		 * Term wasn't quoted so variable and then command
689 		 * expand the input line
690 		 */
691 		Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4;
692 		for (;;) {
693 			c = DgetC(DODOL);
694 			if (c == DEOF)
695 				break;
696 			if ((c &= TRIM) == 0)
697 				continue;
698 			/* \ quotes \ $ ` here */
699 			if (c =='\\') {
700 				c = DgetC(0);
701 /*				if (!any(c, "$\\`"))*/
702 				if ((c!='$')&&(c!='\\')&&(c!='`'))
703 					unDgetC(c | QUOTE), c = '\\';
704 				else
705 					c |= QUOTE;
706 			}
707 			*mbp++ = c;
708 			if (--mcnt == 0) {
709 				setname(S_LESLES/*"<<"*/);
710 				bferr("Line overflow");
711 			}
712 		}
713 		*mbp++ = 0;
714 
715 		/*
716 		 * If any ` in line do command substitution
717 		 */
718 		mbp = mbuf;
719 		if (any('`', mbp)) {
720 			/*
721 			 * 1 arg to dobackp causes substitution to be literal.
722 			 * Words are broken only at newlines so that all blanks
723 			 * and tabs are preserved.  Blank lines (null words)
724 			 * are not discarded.
725 			 */
726 			vp = dobackp(mbuf, 1);
727 		} else
728 			/* Setup trivial vector similar to return of dobackp */
729 			Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv;
730 
731 		/*
732 		 * Resurrect the words from the command substitution
733 		 * each separated by a newline.  Note that the last
734 		 * newline of a command substitution will have been
735 		 * discarded, but we put a newline after the last word
736 		 * because this represents the newline after the last
737 		 * input line!
738 		 */
739 		for (; *vp; vp++) {
740 			for (mbp = *vp; *mbp; mbp++) {
741 				*obp++ = *mbp & TRIM;
742 				if (--ocnt == 0) {
743 					(void) write_(0, obuf, BUFSIZ);
744 					obp = obuf; ocnt = BUFSIZ;
745 				}
746 			}
747 			*obp++ = '\n';
748 			if (--ocnt == 0) {
749 				(void) write_(0, obuf, BUFSIZ);
750 				obp = obuf; ocnt = BUFSIZ;
751 			}
752 		}
753 		if (pargv)
754 			blkfree(pargv), pargv = 0;
755 	}
756 }
757