1%{
2/*
3 * Copyright (C) Lucent Technologies 1997
4 * All Rights Reserved
5 *
6 * Permission to use, copy, modify, and distribute this software and
7 * its documentation for any purpose and without fee is hereby
8 * granted, provided that the above copyright notice appear in all
9 * copies and that both that the copyright notice and this
10 * permission notice and warranty disclaimer appear in supporting
11 * documentation, and that the name Lucent Technologies or any of
12 * its entities not be used in advertising or publicity pertaining
13 * to distribution of the software without specific, written prior
14 * permission.
15 *
16 * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 * THIS SOFTWARE.
24 */
25
26/*
27 * CDDL HEADER START
28 *
29 * The contents of this file are subject to the terms of the
30 * Common Development and Distribution License, Version 1.0 only
31 * (the "License").  You may not use this file except in compliance
32 * with the License.
33 *
34 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
35 * or http://www.opensolaris.org/os/licensing.
36 * See the License for the specific language governing permissions
37 * and limitations under the License.
38 *
39 * When distributing Covered Code, include this CDDL HEADER in each
40 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
41 * If applicable, add the following below this CDDL HEADER, with the
42 * fields enclosed by brackets "[]" replaced with your own identifying
43 * information: Portions Copyright [yyyy] [name of copyright owner]
44 *
45 * CDDL HEADER END
46 */
47%}
48/*
49 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
50 * Use is subject to license terms.
51 */
52
53/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
54/*	  All Rights Reserved  	*/
55
56%{
57#include "awk.h"
58
59void checkdup(Node *list, Cell *item);
60int yywrap(void) { return(1); }
61
62Node	*beginloc = NULL;
63Node	*endloc = NULL;
64int	infunc	= 0;		/* = 1 if in arglist or body of func */
65int	inloop	= 0;		/* = 1 if in while, for, do */
66char	*curfname = NULL;	/* current function name */
67Node	*arglist = NULL;	/* list of args for current function */
68static void	setfname(Cell *);
69static int	constnode(Node *);
70static char	*strnode(Node *);
71static Node	*notnull(Node *);
72%}
73
74%union {
75	Node	*p;
76	Cell	*cp;
77	int	i;
78	char	*s;
79}
80
81%token	<i>	FIRSTTOKEN	/* must be first */
82%token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
83%token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
84%token	<i>	ARRAY
85%token	<i>	MATCH NOTMATCH MATCHOP
86%token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
87%token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
88%token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
89%token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
90%token	<i>	ADD MINUS MULT DIVIDE MOD
91%token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
92%token	<i>	PRINT PRINTF SPRINTF
93%token	<p>	ELSE INTEST CONDEXPR
94%token	<i>	POSTINCR PREINCR POSTDECR PREDECR
95%token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
96%token	<s>	REGEXPR
97
98%type	<p>	pas pattern ppattern plist pplist patlist prarg term re
99%type	<p>	pa_pat pa_stat pa_stats
100%type	<s>	reg_expr
101%type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
102%type	<p>	var varname funcname varlist
103%type	<p>	for if else while
104%type	<i>	do st
105%type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
106%type	<i>	subop print
107
108%right	ASGNOP
109%right	'?'
110%right	':'
111%left	BOR
112%left	AND
113%left	GETLINE
114%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
115%left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
116%left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
117%left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
118%left	REGEXPR VAR VARNF IVAR WHILE '('
119%left	CAT
120%left	'+' '-'
121%left	'*' '/' '%'
122%left	NOT UMINUS UPLUS
123%right	POWER
124%right	DECR INCR
125%left	INDIRECT
126%token	LASTTOKEN	/* must be last */
127
128%%
129
130program:
131	  pas	{ if (errorflag==0)
132			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
133	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
134	;
135
136and:
137	  AND | and NL
138	;
139
140bor:
141	  BOR | bor NL
142	;
143
144comma:
145	  ',' | comma NL
146	;
147
148do:
149	  DO | do NL
150	;
151
152else:
153	  ELSE | else NL
154	;
155
156for:
157	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
158		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
159	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
160		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
161	| FOR '(' varname IN varname rparen {inloop++;} stmt
162		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
163	;
164
165funcname:
166	  VAR	{ setfname($1); }
167	| CALL	{ setfname($1); }
168	;
169
170if:
171	  IF '(' pattern rparen		{ $$ = notnull($3); }
172	;
173
174lbrace:
175	  '{' | lbrace NL
176	;
177
178nl:
179	  NL | nl NL
180	;
181
182opt_nl:
183	  /* empty */	{ $$ = 0; }
184	| nl
185	;
186
187opt_pst:
188	  /* empty */	{ $$ = 0; }
189	| pst
190	;
191
192
193opt_simple_stmt:
194	  /* empty */			{ $$ = 0; }
195	| simple_stmt
196	;
197
198pas:
199	  opt_pst			{ $$ = 0; }
200	| opt_pst pa_stats opt_pst	{ $$ = $2; }
201	;
202
203pa_pat:
204	  pattern	{ $$ = notnull($1); }
205	;
206
207pa_stat:
208	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
209	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
210	| pa_pat ',' opt_nl pa_pat		{ $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
211	| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $4, $6); }
212	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
213	| XBEGIN lbrace stmtlist '}'
214		{ beginloc = linkum(beginloc, $3); $$ = 0; }
215	| XEND lbrace stmtlist '}'
216		{ endloc = linkum(endloc, $3); $$ = 0; }
217	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
218		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
219	;
220
221pa_stats:
222	  pa_stat
223	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
224	;
225
226patlist:
227	  pattern
228	| patlist comma pattern		{ $$ = linkum($1, $3); }
229	;
230
231ppattern:
232	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
233	| ppattern '?' ppattern ':' ppattern %prec '?'
234		{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
235	| ppattern bor ppattern %prec BOR
236		{ $$ = op2(BOR, notnull($1), notnull($3)); }
237	| ppattern and ppattern %prec AND
238		{ $$ = op2(AND, notnull($1), notnull($3)); }
239	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
240	| ppattern MATCHOP ppattern
241		{ if (constnode($3))
242			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
243		  else
244			$$ = op3($2, (Node *)1, $1, $3); }
245	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
246	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
247	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
248	| re
249	| term
250	;
251
252pattern:
253	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
254	| pattern '?' pattern ':' pattern %prec '?'
255		{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
256	| pattern bor pattern %prec BOR
257		{ $$ = op2(BOR, notnull($1), notnull($3)); }
258	| pattern and pattern %prec AND
259		{ $$ = op2(AND, notnull($1), notnull($3)); }
260	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
261	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
262	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
263	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
264	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
265	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
266	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
267	| pattern MATCHOP pattern
268		{ if (constnode($3))
269			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
270		  else
271			$$ = op3($2, (Node *)1, $1, $3); }
272	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
273	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
274	| pattern '|' GETLINE var	{
275			if (safe) SYNTAX("cmd | getline is unsafe");
276			else $$ = op3(GETLINE, $4, itonp($2), $1); }
277	| pattern '|' GETLINE		{
278			if (safe) SYNTAX("cmd | getline is unsafe");
279			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
280	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
281	| re
282	| term
283	;
284
285plist:
286	  pattern comma pattern		{ $$ = linkum($1, $3); }
287	| plist comma pattern		{ $$ = linkum($1, $3); }
288	;
289
290pplist:
291	  ppattern
292	| pplist comma ppattern		{ $$ = linkum($1, $3); }
293	;
294
295prarg:
296	  /* empty */			{ $$ = rectonode(); }
297	| pplist
298	| '(' plist ')'			{ $$ = $2; }
299	;
300
301print:
302	  PRINT | PRINTF
303	;
304
305pst:
306	  NL | ';' | pst NL | pst ';'
307	;
308
309rbrace:
310	  '}' | rbrace NL
311	;
312
313re:
314	   reg_expr
315		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
316	| NOT re	{ $$ = op1(NOT, notnull($2)); }
317	;
318
319reg_expr:
320	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
321	;
322
323rparen:
324	  ')' | rparen NL
325	;
326
327simple_stmt:
328	  print prarg '|' term		{
329			if (safe) SYNTAX("print | is unsafe");
330			else $$ = stat3($1, $2, itonp($3), $4); }
331	| print prarg APPEND term	{
332			if (safe) SYNTAX("print >> is unsafe");
333			else $$ = stat3($1, $2, itonp($3), $4); }
334	| print prarg GT term		{
335			if (safe) SYNTAX("print > is unsafe");
336			else $$ = stat3($1, $2, itonp($3), $4); }
337	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
338	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
339	| DELETE varname		{ $$ = stat2(DELETE, makearr($2), 0); }
340	| pattern			{ $$ = exptostat($1); }
341	| error				{ yyclearin; SYNTAX("illegal statement"); }
342	;
343
344st:
345	  nl
346	| ';' opt_nl
347	;
348
349stmt:
350	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
351				  $$ = stat1(BREAK, NIL); }
352	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
353				  $$ = stat1(CONTINUE, NIL); }
354	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
355		{ $$ = stat2(DO, $3, notnull($7)); }
356	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
357	| EXIT st		{ $$ = stat1(EXIT, NIL); }
358	| for
359	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
360	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
361	| lbrace stmtlist rbrace { $$ = $2; }
362	| NEXT st	{ if (infunc)
363				SYNTAX("next is illegal inside a function");
364			  $$ = stat1(NEXT, NIL); }
365	| NEXTFILE st	{ if (infunc)
366				SYNTAX("nextfile is illegal inside a function");
367			  $$ = stat1(NEXTFILE, NIL); }
368	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
369	| RETURN st		{ $$ = stat1(RETURN, NIL); }
370	| simple_stmt st
371	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
372	| ';' opt_nl		{ $$ = 0; }
373	;
374
375stmtlist:
376	  stmt
377	| stmtlist stmt		{ $$ = linkum($1, $2); }
378	;
379
380subop:
381	  SUB | GSUB
382	;
383
384term:
385	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
386	| term '+' term			{ $$ = op2(ADD, $1, $3); }
387	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
388	| term '*' term			{ $$ = op2(MULT, $1, $3); }
389	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
390	| term '%' term			{ $$ = op2(MOD, $1, $3); }
391	| term POWER term		{ $$ = op2(POWER, $1, $3); }
392	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
393	| '+' term %prec UMINUS		{ $$ = op1(UPLUS, $2); }
394	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
395	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
396	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
397	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
398	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
399	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
400	| CLOSE term			{ $$ = op1(CLOSE, $2); }
401	| DECR var			{ $$ = op1(PREDECR, $2); }
402	| INCR var			{ $$ = op1(PREINCR, $2); }
403	| var DECR			{ $$ = op1(POSTDECR, $1); }
404	| var INCR			{ $$ = op1(POSTINCR, $1); }
405	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
406	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
407	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
408	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
409	| INDEX '(' pattern comma pattern ')'
410		{ $$ = op2(INDEX, $3, $5); }
411	| INDEX '(' pattern comma reg_expr ')'
412		{ SYNTAX("index() doesn't permit regular expressions");
413		  $$ = op2(INDEX, $3, (Node*)$5); }
414	| '(' pattern ')'		{ $$ = $2; }
415	| MATCHFCN '(' pattern comma reg_expr ')'
416		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
417	| MATCHFCN '(' pattern comma pattern ')'
418		{ if (constnode($5))
419			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
420		  else
421			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
422	| NUMBER			{ $$ = celltonode($1, CCON); }
423	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
424		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
425	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
426		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
427	| SPLIT '(' pattern comma varname ')'
428		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
429	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
430	| STRING			{ $$ = celltonode($1, CCON); }
431	| subop '(' reg_expr comma pattern ')'
432		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
433	| subop '(' pattern comma pattern ')'
434		{ if (constnode($3))
435			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
436		  else
437			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
438	| subop '(' reg_expr comma pattern comma var ')'
439		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
440	| subop '(' pattern comma pattern comma var ')'
441		{ if (constnode($3))
442			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
443		  else
444			$$ = op4($1, (Node *)1, $3, $5, $7); }
445	| SUBSTR '(' pattern comma pattern comma pattern ')'
446		{ $$ = op3(SUBSTR, $3, $5, $7); }
447	| SUBSTR '(' pattern comma pattern ')'
448		{ $$ = op3(SUBSTR, $3, $5, NIL); }
449	| var
450	;
451
452var:
453	  varname
454	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
455	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
456	| INDIRECT term			{ $$ = op1(INDIRECT, $2); }
457	;
458
459varlist:
460	  /* nothing */		{ arglist = $$ = 0; }
461	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
462	| varlist comma VAR	{
463			checkdup($1, $3);
464			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
465	;
466
467varname:
468	  VAR			{ $$ = celltonode($1, CVAR); }
469	| ARG			{ $$ = op1(ARG, itonp($1)); }
470	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
471	;
472
473
474while:
475	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
476	;
477
478%%
479
480static void
481setfname(Cell *p)
482{
483	if (isarr(p))
484		SYNTAX("%s is an array, not a function", p->nval);
485	else if (isfcn(p))
486		SYNTAX("you can't define function %s more than once", p->nval);
487	curfname = p->nval;
488	p->tval |= FCN;
489}
490
491static int
492constnode(Node *p)
493{
494	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
495}
496
497static char *
498strnode(Node *p)
499{
500	return ((Cell *)(p->narg[0]))->sval;
501}
502
503static Node *
504notnull(Node *n)
505{
506	switch (n->nobj) {
507	case LE: case LT: case EQ: case NE: case GT: case GE:
508	case BOR: case AND: case NOT:
509		return n;
510	default:
511		return op2(NE, n, nullnode);
512	}
513}
514
515void
516checkdup(Node *vl, Cell *cp)	/* check if name already in list */
517{
518	char *s = cp->nval;
519	for (; vl; vl = vl->nnext) {
520		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
521			SYNTAX("duplicate argument %s", s);
522			break;
523		}
524	}
525}
526