1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #include <stdio.h>
31 #include "awk.def"
32 #include "awk.h"
33 #include <ctype.h>
34 #include <wctype.h>
35 #include "awktype.h"
36 #include <stdlib.h>
37 #include <stdarg.h>
38
39 FILE *infile = NULL;
40 wchar_t *file;
41 #define RECSIZE (5 * 512)
42 wchar_t record[RECSIZE];
43 wchar_t fields[RECSIZE];
44 wchar_t L_NULL[] = L"";
45
46
47 #define MAXFLD 100
48 int donefld; /* 1 = implies rec broken into fields */
49 int donerec; /* 1 = record is valid (no flds have changed) */
50 int mustfld; /* 1 = NF seen, so always break */
51 static wchar_t L_record[] = L"$record";
52
53
54 #define FINIT { OCELL, CFLD, 0, L_NULL, 0.0, FLD|STR }
55 CELL fldtab[MAXFLD] = { /* room for fields */
56 { OCELL, CFLD, L_record, record, 0.0, STR|FLD},
57 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
58 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
59 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
60 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
61 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
62 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
63 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
64 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
65 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT,
66 FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT, FINIT
67 };
68 int maxfld = 0; /* last used field */
69 /* pointer to CELL for maximum field assigned to */
70 CELL *maxmfld = &fldtab[0];
71
72 static int isclvar(wchar_t *);
73 static void setclvar(wchar_t *);
74 void fldbld(void);
75
76 int
getrec(void)77 getrec(void)
78 {
79 wchar_t *rr, *er;
80 int c, sep;
81 FILE *inf;
82 extern int svargc;
83 extern wchar_t **svargv;
84
85
86 dprintf("**RS=%o, **FS=%o\n", **RS, **FS, NULL);
87 donefld = 0;
88 donerec = 1;
89 record[0] = 0;
90 er = record + RECSIZE;
91 while (svargc > 0) {
92 dprintf("svargc=%d, *svargv=%ws\n", svargc, *svargv, NULL);
93 if (infile == NULL) { /* have to open a new file */
94 /*
95 * If the argument contains a '=', determine if the
96 * argument needs to be treated as a variable assignment
97 * or as the pathname of a file.
98 */
99 if (isclvar(*svargv)) {
100 /* it's a var=value argument */
101 setclvar(*svargv);
102 if (svargc > 1) {
103 svargv++;
104 svargc--;
105 continue;
106 }
107 *svargv = L"-";
108 }
109 *FILENAME = file = *svargv;
110 dprintf("opening file %ws\n", file, NULL, NULL);
111 if (*file == (wchar_t)L'-')
112 infile = stdin;
113 else if ((infile = fopen(toeuccode(file), "r")) == NULL)
114 error(FATAL, "can't open %ws", file);
115 }
116 if ((sep = **RS) == 0)
117 sep = '\n';
118 inf = infile;
119 for (rr = record; /* dummy */; /* dummy */) {
120 for (; (c = getwc(inf)) != sep && c != EOF && rr < er;
121 *rr++ = c)
122 ;
123 if (rr >= er)
124 error(FATAL, "record `%.20ws...' too long",
125 record);
126 if (**RS == sep || c == EOF)
127 break;
128 if ((c = getwc(inf)) == '\n' || c == EOF)
129 /* 2 in a row */
130 break;
131 *rr++ = '\n';
132 *rr++ = c;
133 }
134 if (rr >= er)
135 error(FATAL, "record `%.20ws...' too long", record);
136 *rr = 0;
137 if (mustfld)
138 fldbld();
139 if (c != EOF || rr > record) { /* normal record */
140 recloc->tval &= ~NUM;
141 recloc->tval |= STR;
142 ++nrloc->fval;
143 nrloc->tval &= ~STR;
144 nrloc->tval |= NUM;
145 return (1);
146 }
147 /* EOF arrived on this file; set up next */
148 if (infile != stdin)
149 fclose(infile);
150 infile = NULL;
151 svargc--;
152 svargv++;
153 }
154 return (0); /* true end of file */
155 }
156
157 /*
158 * isclvar()
159 *
160 * Returns 1 if the input string, arg, is a variable assignment,
161 * otherwise returns 0.
162 *
163 * An argument to awk can be either a pathname of a file, or a variable
164 * assignment. An operand that begins with an undersore or alphabetic
165 * character from the portable character set, followed by a sequence of
166 * underscores, digits, and alphabetics from the portable character set,
167 * followed by the '=' character, shall specify a variable assignment
168 * rather than a pathname.
169 */
170 static int
isclvar(wchar_t * arg)171 isclvar(wchar_t *arg)
172 {
173 wchar_t *tmpptr = arg;
174
175 if (tmpptr != NULL) {
176
177 /* Begins with an underscore or alphabetic character */
178 if (iswalpha(*tmpptr) || *tmpptr == '_') {
179
180 /*
181 * followed by a sequence of underscores, digits,
182 * and alphabetics
183 */
184 for (tmpptr++; *tmpptr; tmpptr++) {
185 if (!(iswalnum(*tmpptr) || (*tmpptr == '_'))) {
186 break;
187 }
188 }
189 return (*tmpptr == '=');
190 }
191 }
192
193 return (0);
194 }
195
196 static void
setclvar(wchar_t * s)197 setclvar(wchar_t *s) /* set var=value from s */
198 {
199 wchar_t *p;
200 CELL *q;
201
202
203 for (p = s; *p != '='; p++)
204 ;
205 *p++ = 0;
206 q = setsymtab(s, tostring(p), 0.0, STR, symtab);
207 setsval(q, p);
208 dprintf("command line set %ws to |%ws|\n", s, p, NULL);
209 }
210
211
212 void
fldbld(void)213 fldbld(void)
214 {
215 wchar_t *r, *fr, sep, c;
216 static wchar_t L_NF[] = L"NF";
217 CELL *p, *q;
218 int i, j;
219
220
221 r = record;
222 fr = fields;
223 i = 0; /* number of fields accumulated here */
224 if ((sep = **FS) == ' ')
225 for (i = 0; /* dummy */; /* dummy */) {
226 c = *r;
227 while (iswblank(c) || c == '\t' || c == '\n')
228 c = *(++r);
229 if (*r == 0)
230 break;
231 i++;
232 if (i >= MAXFLD)
233 error(FATAL,
234 "record `%.20ws...' has too many fields", record);
235 if (!(fldtab[i].tval&FLD))
236 xfree(fldtab[i].sval);
237 fldtab[i].sval = fr;
238 fldtab[i].tval = FLD | STR;
239 do {
240 *fr++ = *r++;
241 c = *r;
242 } while (! iswblank(c) && c != '\t' &&
243 c != '\n' && c != '\0');
244
245
246 *fr++ = 0;
247
248 } else if (*r != 0) /* if 0, it's a null field */
249 for (;;) {
250 i++;
251 if (i >= MAXFLD)
252 error(FATAL,
253 "record `%.20ws...' has too many fields", record);
254 if (!(fldtab[i].tval&FLD))
255 xfree(fldtab[i].sval);
256 fldtab[i].sval = fr;
257 fldtab[i].tval = FLD | STR;
258 while ((c = *r) != sep && c != '\n' && c != '\0')
259 /* \n always a separator */
260 *fr++ = *r++;
261 *fr++ = 0;
262 if (*r++ == 0)
263 break;
264 }
265 *fr = 0;
266 /* clean out junk from previous record */
267 for (p = maxmfld, q = &fldtab[i]; p > q; p--) {
268 if (!(p->tval&FLD))
269 xfree(p->sval);
270 p->tval = STR | FLD;
271 p->sval = L_NULL;
272 }
273 maxfld = i;
274 maxmfld = &fldtab[i];
275 donefld = 1;
276 for (i = 1; i <= maxfld; i++)
277 if (isanumber(fldtab[i].sval)) {
278 fldtab[i].fval = watof(fldtab[i].sval);
279 fldtab[i].tval |= NUM;
280 }
281 setfval(lookup(L_NF, symtab, 0), (awkfloat) maxfld);
282 if (dbg)
283 for (i = 0; i <= maxfld; i++)
284 printf("field %d: |%ws|\n", i, fldtab[i].sval);
285 }
286
287
288 void
recbld(void)289 recbld(void)
290 {
291 int i;
292 wchar_t *r, *p;
293
294
295 if (donefld == 0 || donerec == 1)
296 return;
297 r = record;
298 for (i = 1; i <= *NF; i++) {
299 p = getsval(&fldtab[i]);
300 while (*r++ = *p++)
301 ;
302 *(r-1) = **OFS;
303 }
304 *(r-1) = '\0';
305 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
306 recloc->tval = STR | FLD;
307 dprintf("in recbld FS=%o, recloc=%o\n", **FS, recloc, NULL);
308 if (r > record+RECSIZE)
309 error(FATAL, "built giant record `%.20ws...'", record);
310 dprintf("recbld = |%ws|\n", record, NULL, NULL);
311 }
312
313
314 CELL *
fieldadr(int n)315 fieldadr(int n)
316 {
317 if (n < 0 || n >= MAXFLD)
318 error(FATAL, "trying to access field %d", n);
319 return (&fldtab[n]);
320 }
321
322
323 int errorflag = 0;
324
325
326 int
yyerror(char * s)327 yyerror(char *s)
328 {
329 fprintf(stderr,
330 gettext("awk: %s near line %lld\n"), gettext(s), lineno);
331 errorflag = 2;
332 return (0);
333 }
334
335
336 void
error(int f,char * fmt,...)337 error(int f, char *fmt, ...)
338 {
339 va_list ap;
340
341 va_start(ap, fmt);
342 fprintf(stderr, "awk: ");
343 vfprintf(stderr, gettext(fmt), ap);
344 va_end(ap);
345 fprintf(stderr, "\n");
346 if (NR && *NR > 0)
347 fprintf(stderr, gettext(" record number %g\n"), *NR);
348 if (f)
349 exit(2);
350 }
351
352
353 void
PUTS(char * s)354 PUTS(char *s)
355 {
356 dprintf("%s\n", s, NULL, NULL);
357 }
358
359
360 #define MAXEXPON 38 /* maximum exponenet for fp number */
361
362
363 int
isanumber(wchar_t * s)364 isanumber(wchar_t *s)
365 {
366 int d1, d2;
367 int point;
368 wchar_t *es;
369 extern wchar_t radixpoint;
370
371 d1 = d2 = point = 0;
372 while (*s == ' ' || *s == '\t' || *s == '\n')
373 s++;
374 if (*s == '\0')
375 return (0); /* empty stuff isn't number */
376 if (*s == '+' || *s == '-')
377 s++;
378 /*
379 * Since, iswdigit() will include digit from other than code set 0,
380 * we have to check it from code set 0 or not.
381 */
382 if (!(iswdigit(*s) && iswascii(*s)) && *s != radixpoint)
383 return (0);
384 if (iswdigit(*s) && iswascii(*s)) {
385 do {
386 d1++;
387 s++;
388 } while (iswdigit(*s) && iswascii(*s));
389 }
390 if (d1 >= MAXEXPON)
391 return (0); /* too many digits to convert */
392 if (*s == radixpoint) {
393 point++;
394 s++;
395 }
396 if (iswdigit(*s) && iswascii(*s)) {
397 d2++;
398 do {
399 s++;
400 } while (iswdigit(*s) && iswascii(*s));
401 }
402
403
404 if (!(d1 || point && d2))
405 return (0);
406 if (*s == 'e' || *s == 'E') {
407 s++;
408 if (*s == '+' || *s == '-')
409 s++;
410 if (!(iswdigit(*s) && iswascii(*s)))
411 return (0);
412 es = s;
413 do {
414 s++;
415 } while (iswdigit(*s) && iswascii(*s));
416
417
418 if (s - es > 2)
419 return (0);
420 else if (s - es == 2 &&
421 10 * (*es-'0') + *(es+1)-'0' >= MAXEXPON)
422 return (0);
423 }
424 while (*s == ' ' || *s == '\t' || *s == '\n')
425 s++;
426 if (*s == '\0')
427 return (1);
428 else
429 return (0);
430 }
431 char *
toeuccode(str)432 toeuccode(str)
433 wchar_t *str;
434 {
435 static char euccode[RECSIZE];
436
437 (void) wcstombs(euccode, str, RECSIZE);
438 return (euccode);
439 }
440