xref: /illumos-gate/usr/src/cmd/awk/lib.c (revision 7c478bd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright (c) 1996-2001 by Sun Microsystems, Inc.
28  * All rights reserved.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 2.13	*/
32 
33 #include <stdio.h>
34 #include <ctype.h>
35 #include <errno.h>
36 #include <libintl.h>
37 #include "awk.h"
38 #include "y.tab.h"
39 
40 #define	getfval(p)	\
41 	(((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p))
42 #define	getsval(p)	\
43 	(((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p))
44 
45 extern	Awkfloat r_getfval();
46 extern	uchar	*r_getsval();
47 
48 FILE	*infile	= NULL;
49 uchar	*file	= (uchar*) "";
50 uchar	recdata[RECSIZE];
51 uchar	*record	= recdata;
52 uchar	fields[RECSIZE];
53 
54 int	donefld;	/* 1 = implies rec broken into fields */
55 int	donerec;	/* 1 = record is valid (no flds have changed) */
56 
57 Cell fldtab[MAXFLD];	/* room for fields */
58 
59 int	maxfld	= 0;	/* last used field */
60 int	argno	= 1;	/* current input argument number */
61 extern	Awkfloat *ARGC;
62 extern	uchar	*getargv();
63 
64 initgetrec()
65 {
66 	int i;
67 	uchar *p;
68 
69 	for (i = 1; i < *ARGC; i++) {
70 		if (!isclvar(p = getargv(i)))	/* find 1st real filename */
71 			return;
72 		setclvar(p);	/* a commandline assignment before filename */
73 		argno++;
74 	}
75 	infile = stdin;		/* no filenames, so use stdin */
76 	/* *FILENAME = file = (uchar*) "-"; */
77 }
78 
79 getrec(buf)
80 	uchar *buf;
81 {
82 	int c;
83 	static int firsttime = 1;
84 
85 	if (firsttime) {
86 		firsttime = 0;
87 		initgetrec();
88 	}
89 	dprintf(("RS=<%s>, FS=<%s>, ARGC=%f, FILENAME=%s\n",
90 		*RS, *FS, *ARGC, *FILENAME));
91 	donefld = 0;
92 	donerec = 1;
93 	buf[0] = 0;
94 	while (argno < *ARGC || infile == stdin) {
95 		dprintf(("argno=%d, file=|%s|\n", argno, file));
96 		if (infile == NULL) {	/* have to open a new file */
97 			file = getargv(argno);
98 			if (*file == '\0') {	/* it's been zapped */
99 				argno++;
100 				continue;
101 			}
102 			if (isclvar(file)) {	/* a var=value arg */
103 				setclvar(file);
104 				argno++;
105 				continue;
106 			}
107 			*FILENAME = file;
108 			dprintf(("opening file %s\n", file));
109 			if (*file == '-' && *(file+1) == '\0')
110 				infile = stdin;
111 			else if ((infile = fopen((char *)file, "r")) == NULL)
112 				ERROR "can't open file %s", file FATAL;
113 			setfval(fnrloc, 0.0);
114 		}
115 		c = readrec(buf, RECSIZE, infile);
116 		if (c != 0 || buf[0] != '\0') {	/* normal record */
117 			if (buf == record) {
118 				if (!(recloc->tval & DONTFREE))
119 					xfree(recloc->sval);
120 				recloc->sval = record;
121 				recloc->tval = REC | STR | DONTFREE;
122 				if (isnumber(recloc->sval)) {
123 					recloc->fval = atof(recloc->sval);
124 					recloc->tval |= NUM;
125 				}
126 			}
127 			setfval(nrloc, nrloc->fval+1);
128 			setfval(fnrloc, fnrloc->fval+1);
129 			return (1);
130 		}
131 		/* EOF arrived on this file; set up next */
132 		if (infile != stdin)
133 			fclose(infile);
134 		infile = NULL;
135 		argno++;
136 	}
137 	return (0);	/* true end of file */
138 }
139 
140 readrec(buf, bufsize, inf)	/* read one record into buf */
141 	uchar *buf;
142 	int bufsize;
143 	FILE *inf;
144 {
145 	register int sep, c;
146 	register uchar *rr;
147 	int	count;
148 
149 	if ((sep = **RS) == 0) {
150 		sep = '\n';
151 		/* skip leading \n's */
152 		while ((c = getc(inf)) == '\n' && c != EOF)
153 			;
154 		if (c != EOF)
155 			ungetc(c, inf);
156 	}
157 	for (rr = buf, count = 0; ; ) {
158 		while ((c = getc(inf)) != sep && c != EOF) {
159 			count++;
160 			if (count > bufsize)
161 				ERROR "input record `%.20s...' too long",
162 				    buf FATAL;
163 			*rr++ = c;
164 		}
165 		if (**RS == sep || c == EOF)
166 			break;
167 		if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
168 			break;
169 		count += 2;
170 		if (count > bufsize)
171 			ERROR "input record `%.20s...' too long", buf FATAL;
172 		*rr++ = '\n';
173 		*rr++ = c;
174 	}
175 	*rr = 0;
176 	dprintf(("readrec saw <%s>, returns %d\n",
177 		buf, c == EOF && rr == buf ? 0 : 1));
178 	return (c == EOF && rr == buf ? 0 : 1);
179 }
180 
181 /* get ARGV[n] */
182 uchar *
183 getargv(n)
184 	int n;
185 {
186 	Cell *x;
187 	uchar *s, temp[10];
188 	extern Array *ARGVtab;
189 
190 	sprintf((char *)temp, "%d", n);
191 	x = setsymtab(temp, "", 0.0, STR, ARGVtab);
192 	s = getsval(x);
193 	dprintf(("getargv(%d) returns |%s|\n", n, s));
194 	return (s);
195 }
196 
197 setclvar(s)	/* set var=value from s */
198 uchar *s;
199 {
200 	uchar *p;
201 	Cell *q;
202 
203 	for (p = s; *p != '='; p++)
204 		;
205 	*p++ = 0;
206 	p = qstring(p, '\0');
207 	q = setsymtab(s, p, 0.0, STR, symtab);
208 	setsval(q, p);
209 	if (isnumber(q->sval)) {
210 		q->fval = atof(q->sval);
211 		q->tval |= NUM;
212 	}
213 	dprintf(("command line set %s to |%s|\n", s, p));
214 }
215 
216 
217 fldbld()
218 {
219 	register uchar *r, *fr, sep;
220 	Cell *p;
221 	int i;
222 
223 	if (donefld)
224 		return;
225 	if (!(recloc->tval & STR))
226 		getsval(recloc);
227 	r = recloc->sval;	/* was record! */
228 	fr = fields;
229 	i = 0;	/* number of fields accumulated here */
230 	if (strlen(*FS) > 1) {	/* it's a regular expression */
231 		i = refldbld(r, *FS);
232 	} else if ((sep = **FS) == ' ') {
233 		for (i = 0; ; ) {
234 			while (*r == ' ' || *r == '\t' || *r == '\n')
235 				r++;
236 			if (*r == 0)
237 				break;
238 			i++;
239 			if (i >= MAXFLD)
240 				break;
241 			if (!(fldtab[i].tval & DONTFREE))
242 				xfree(fldtab[i].sval);
243 			fldtab[i].sval = fr;
244 			fldtab[i].tval = FLD | STR | DONTFREE;
245 			do
246 				*fr++ = *r++;
247 			while (*r != ' ' && *r != '\t' && *r != '\n' &&
248 				*r != '\0');
249 			*fr++ = 0;
250 		}
251 		*fr = 0;
252 	} else if (*r != 0) {	/* if 0, it's a null field */
253 		for (;;) {
254 			i++;
255 			if (i >= MAXFLD)
256 				break;
257 			if (!(fldtab[i].tval & DONTFREE))
258 				xfree(fldtab[i].sval);
259 			fldtab[i].sval = fr;
260 			fldtab[i].tval = FLD | STR | DONTFREE;
261 			/* \n always a separator */
262 			while (*r != sep && *r != '\n' && *r != '\0')
263 				*fr++ = *r++;
264 			*fr++ = 0;
265 			if (*r++ == 0)
266 				break;
267 		}
268 		*fr = 0;
269 	}
270 	if (i >= MAXFLD)
271 		ERROR "record `%.20s...' has too many fields", record FATAL;
272 	/* clean out junk from previous record */
273 	cleanfld(i, maxfld);
274 	maxfld = i;
275 	donefld = 1;
276 	for (p = fldtab+1; p <= fldtab+maxfld; p++) {
277 		if (isnumber(p->sval)) {
278 			p->fval = atof(p->sval);
279 			p->tval |= NUM;
280 		}
281 	}
282 	setfval(nfloc, (Awkfloat) maxfld);
283 	if (dbg)
284 		for (p = fldtab; p <= fldtab+maxfld; p++)
285 			printf("field %d: |%s|\n", p-fldtab, p->sval);
286 }
287 
288 cleanfld(n1, n2)	/* clean out fields n1..n2 inclusive */
289 {
290 	static uchar *nullstat = (uchar *) "";
291 	register Cell *p, *q;
292 
293 	for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) {
294 		if (!(p->tval & DONTFREE))
295 			xfree(p->sval);
296 		p->tval = FLD | STR | DONTFREE;
297 		p->sval = nullstat;
298 	}
299 }
300 
301 newfld(n)	/* add field n (after end) */
302 {
303 	if (n >= MAXFLD)
304 		ERROR "creating too many fields", record FATAL;
305 	cleanfld(maxfld, n);
306 	maxfld = n;
307 	setfval(nfloc, (Awkfloat) n);
308 }
309 
310 refldbld(rec, fs)	/* build fields from reg expr in FS */
311 	uchar *rec, *fs;
312 {
313 	fa *makedfa();
314 	uchar *fr;
315 	int i, tempstat;
316 	fa *pfa;
317 
318 	fr = fields;
319 	*fr = '\0';
320 	if (*rec == '\0')
321 		return (0);
322 	pfa = makedfa(fs, 1);
323 	dprintf(("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs));
324 	tempstat = pfa->initstat;
325 	for (i = 1; i < MAXFLD; i++) {
326 		if (!(fldtab[i].tval & DONTFREE))
327 			xfree(fldtab[i].sval);
328 		fldtab[i].tval = FLD | STR | DONTFREE;
329 		fldtab[i].sval = fr;
330 		dprintf(("refldbld: i=%d\n", i));
331 		if (nematch(pfa, rec)) {
332 			pfa->initstat = 2;
333 			dprintf(("match %s (%d chars)\n", patbeg, patlen));
334 			strncpy(fr, rec, patbeg-rec);
335 			fr += patbeg - rec + 1;
336 			*(fr-1) = '\0';
337 			rec = patbeg + patlen;
338 		} else {
339 			dprintf(("no match %s\n", rec));
340 			strcpy(fr, rec);
341 			pfa->initstat = tempstat;
342 			break;
343 		}
344 	}
345 	return (i);
346 }
347 
348 recbld()
349 {
350 	int i;
351 	register uchar *r, *p;
352 	static uchar rec[RECSIZE];
353 
354 	if (donerec == 1)
355 		return;
356 	r = rec;
357 	for (i = 1; i <= *NF; i++) {
358 		p = getsval(&fldtab[i]);
359 		while ((r < rec + RECSIZE) && (*r = *p++))
360 			r++;
361 		if (i < *NF)
362 			for (p = *OFS; (r < rec + RECSIZE) && (*r = *p++); )
363 				r++;
364 	}
365 	if (r >= rec + RECSIZE)
366 		ERROR "built giant record `%.20s...'", record FATAL;
367 	*r = '\0';
368 	dprintf(("in recbld FS=%o, recloc=%o\n", **FS, recloc));
369 	recloc->tval = REC | STR | DONTFREE;
370 	recloc->sval = record = rec;
371 	dprintf(("in recbld FS=%o, recloc=%o\n", **FS, recloc));
372 	dprintf(("recbld = |%s|\n", record));
373 	donerec = 1;
374 }
375 
376 Cell *
377 fieldadr(n)
378 {
379 	if (n < 0 || n >= MAXFLD)
380 		ERROR "trying to access field %d", n FATAL;
381 	return (&fldtab[n]);
382 }
383 
384 int	errorflag	= 0;
385 char	errbuf[200];
386 
387 yyerror(s)
388 	uchar *s;
389 {
390 	extern uchar *cmdname, *curfname;
391 	static int been_here = 0;
392 
393 	if (been_here++ > 2)
394 		return;
395 	fprintf(stderr, "%s: %s", cmdname, s);
396 	fprintf(stderr, gettext(" at source line %lld"), lineno);
397 	if (curfname != NULL)
398 		fprintf(stderr, gettext(" in function %s"), curfname);
399 	fprintf(stderr, "\n");
400 	errorflag = 2;
401 	eprint();
402 }
403 
404 fpecatch()
405 {
406 	ERROR "floating point exception" FATAL;
407 }
408 
409 extern int bracecnt, brackcnt, parencnt;
410 
411 bracecheck()
412 {
413 	int c;
414 	static int beenhere = 0;
415 
416 	if (beenhere++)
417 		return;
418 	while ((c = input()) != EOF && c != '\0')
419 		bclass(c);
420 	bcheck2(bracecnt, '{', '}');
421 	bcheck2(brackcnt, '[', ']');
422 	bcheck2(parencnt, '(', ')');
423 }
424 
425 bcheck2(n, c1, c2)
426 {
427 	if (n == 1)
428 		fprintf(stderr, gettext("\tmissing %c\n"), c2);
429 	else if (n > 1)
430 		fprintf(stderr, gettext("\t%d missing %c's\n"), n, c2);
431 	else if (n == -1)
432 		fprintf(stderr, gettext("\textra %c\n"), c2);
433 	else if (n < -1)
434 		fprintf(stderr, gettext("\t%d extra %c's\n"), -n, c2);
435 }
436 
437 error(f, s)
438 	int f;
439 	char *s;
440 {
441 	extern Node *curnode;
442 	extern uchar *cmdname;
443 
444 	fflush(stdout);
445 	fprintf(stderr, "%s: ", cmdname);
446 	fprintf(stderr, "%s", s);
447 	fprintf(stderr, "\n");
448 	if (compile_time != 2 && NR && *NR > 0) {
449 		fprintf(stderr, gettext(" input record number %g"), *FNR);
450 		if (strcmp(*FILENAME, "-") != 0)
451 			fprintf(stderr, gettext(", file %s"), *FILENAME);
452 		fprintf(stderr, "\n");
453 	}
454 	if (compile_time != 2 && curnode)
455 		fprintf(stderr, gettext(" source line number %lld\n"),
456 		    curnode->lineno);
457 	else if (compile_time != 2 && lineno)
458 		fprintf(stderr, gettext(" source line number %lld\n"), lineno);
459 	eprint();
460 	if (f) {
461 		if (dbg)
462 			abort();
463 		exit(2);
464 	}
465 }
466 
467 eprint()	/* try to print context around error */
468 {
469 	uchar *p, *q;
470 	int c;
471 	static int been_here = 0;
472 	extern uchar ebuf[300], *ep;
473 
474 	if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
475 		return;
476 	p = ep - 1;
477 	if (p > ebuf && *p == '\n')
478 		p--;
479 	for (; p > ebuf && *p != '\n' && *p != '\0'; p--)
480 		;
481 	while (*p == '\n')
482 		p++;
483 	fprintf(stderr, gettext(" context is\n\t"));
484 	for (q = ep-1; q >= p && *q != ' ' && *q != '\t' && *q != '\n';
485 		q--)
486 		;
487 	for (; p < q; p++)
488 		if (*p)
489 			putc(*p, stderr);
490 	fprintf(stderr, " >>> ");
491 	for (; p < ep; p++)
492 		if (*p)
493 			putc(*p, stderr);
494 	fprintf(stderr, " <<< ");
495 	if (*ep)
496 		while ((c = input()) != '\n' && c != '\0' && c != EOF) {
497 			putc(c, stderr);
498 			bclass(c);
499 		}
500 	putc('\n', stderr);
501 	ep = ebuf;
502 }
503 
504 bclass(c)
505 {
506 	switch (c) {
507 	case '{': bracecnt++; break;
508 	case '}': bracecnt--; break;
509 	case '[': brackcnt++; break;
510 	case ']': brackcnt--; break;
511 	case '(': parencnt++; break;
512 	case ')': parencnt--; break;
513 	}
514 }
515 
516 double
517 errcheck(x, s)
518 	double x;
519 	uchar *s;
520 {
521 	extern int errno;
522 
523 	if (errno == EDOM) {
524 		errno = 0;
525 		ERROR "%s argument out of domain", s WARNING;
526 		x = 1;
527 	} else if (errno == ERANGE) {
528 		errno = 0;
529 		ERROR "%s result out of range", s WARNING;
530 		x = 1;
531 	}
532 	return (x);
533 }
534 
535 PUTS(s) uchar *s; {
536 	dprintf(("%s\n", s));
537 }
538 
539 isclvar(s)	/* is s of form var=something? */
540 	char *s;
541 {
542 	char *os = s;
543 
544 	for (; *s; s++)
545 		if (!(isalnum(*s) || *s == '_'))
546 			break;
547 	return (*s == '=' && s > os && *(s+1) != '=');
548 }
549 
550 #define	MAXEXPON	38	/* maximum exponent for fp number */
551 
552 isnumber(s)
553 register uchar *s;
554 {
555 	register int d1, d2;
556 	int point;
557 	uchar *es;
558 	extern char	radixpoint;
559 
560 	d1 = d2 = point = 0;
561 	while (*s == ' ' || *s == '\t' || *s == '\n')
562 		s++;
563 	if (*s == '\0')
564 		return (0);	/* empty stuff isn't number */
565 	if (*s == '+' || *s == '-')
566 		s++;
567 	if (!isdigit(*s) && *s != radixpoint)
568 		return (0);
569 	if (isdigit(*s)) {
570 		do {
571 			d1++;
572 			s++;
573 		} while (isdigit(*s));
574 	}
575 	if (d1 >= MAXEXPON)
576 		return (0);	/* too many digits to convert */
577 	if (*s == radixpoint) {
578 		point++;
579 		s++;
580 	}
581 	if (isdigit(*s)) {
582 		d2++;
583 		do {
584 			s++;
585 		} while (isdigit(*s));
586 	}
587 	if (!(d1 || point && d2))
588 		return (0);
589 	if (*s == 'e' || *s == 'E') {
590 		s++;
591 		if (*s == '+' || *s == '-')
592 			s++;
593 		if (!isdigit(*s))
594 			return (0);
595 		es = s;
596 		do {
597 			s++;
598 		} while (isdigit(*s));
599 		if (s - es > 2)
600 			return (0);
601 		else if (s - es == 2 &&
602 			(int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON)
603 			return (0);
604 	}
605 	while (*s == ' ' || *s == '\t' || *s == '\n')
606 		s++;
607 	if (*s == '\0')
608 		return (1);
609 	else
610 		return (0);
611 }
612