xref: /illumos-gate/usr/src/cmd/oawk/awk.lx.l (revision 2a8bcb4e)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 /*
24  * Copyright 1996 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 %}
28 %{
29 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
30 %}
31 %{
32 /*	  All Rights Reserved  	*/
33 %}
34 
35 %Start A str chc sc reg comment
36 
37 %{
38 #include	"awk.h"
39 #include	"awk.def"
40 #undef	input	/* defeat lex */
41 extern int	yylval;
42 extern int	mustfld;
43 
44 long long	lineno	= 1;
45 #ifdef	DEBUG
46 #	define	RETURN(x)	{if (dbg) ptoken(x); return (x); }
47 #else
48 #	define	RETURN(x)	return (x)
49 #endif
50 #define	CADD	{ cbuf[clen++]=yytext[0]; if (clen>=CBUFLEN-1) { yyerror(\
51 		"string too long", cbuf); BEGIN A; } }
52 #define	CBUFLEN	150
53 wchar_t cbuf[CBUFLEN];
54 int	clen, cflag;
55 %}
56 
57 %a	50000
58 %o	50000
59 
60 A	[a-zA-Z_]
61 B	[a-zA-Z0-9_]
62 D	[0-9]
63 WS	[ \t]
64 
65 %%
66 	switch (yybgin-yysvec-1) {	/* witchcraft */
67 	case 0:
68 		BEGIN A;
69 		break;
70 	case sc:
71 		BEGIN A;
72 		RETURN('}');
73 	}
74 
75 <A>^\n		lineno++;
76 <A>^{WS}*#.*\n	lineno++;	/* strip comment lines */
77 <A>{WS}		/* dummy for cstyle */;
78 <A>"\\"\n	lineno++;
79 <reg>"\\"\n	lineno++;
80 <A>"||"		RETURN(BOR);
81 <A>BEGIN	RETURN(XBEGIN);
82 <A>END		RETURN(XEND);
83 <A>PROGEND	RETURN(EOF);
84 <A>"&&"		RETURN(AND);
85 <A>"!"		RETURN(NOT);
86 <A>"!="		{ yylval = NE; RETURN(RELOP); }
87 <A>"~"		{ yylval = MATCH; RETURN(MATCHOP); }
88 <A>"!~"		{ yylval = NOTMATCH; RETURN(MATCHOP); }
89 <A>"<"		{ yylval = LT; RETURN(RELOP); }
90 <A>"<="		{ yylval = LE; RETURN(RELOP); }
91 <A>"=="		{ yylval = EQ; RETURN(RELOP); }
92 <A>">="		{ yylval = GE; RETURN(RELOP); }
93 <A>">"		{ yylval = GT; RETURN(RELOP); }
94 <A>">>"		{ yylval = APPEND; RETURN(RELOP); }
95 <A>"++"		{ yylval = INCR; RETURN(INCR); }
96 <A>"--"		{ yylval = DECR; RETURN(DECR); }
97 <A>"+="		{ yylval = ADDEQ; RETURN(ASGNOP); }
98 <A>"-="		{ yylval = SUBEQ; RETURN(ASGNOP); }
99 <A>"*="		{ yylval = MULTEQ; RETURN(ASGNOP); }
100 <A>"/="		{ yylval = DIVEQ; RETURN(ASGNOP); }
101 <A>"%="		{ yylval = MODEQ; RETURN(ASGNOP); }
102 <A>"="		{ yylval = ASSIGN; RETURN(ASGNOP); }
103 
104 <A>"$"{D}+	{
105 		static wchar_t L_record[] = L"$record";
106 		if (watoi(yytext+1)==0) {
107 				yylval = (int)lookup(L_record, symtab, 0);
108 				RETURN(STRING);
109 			} else {
110 				yylval = fieldadr(watoi(yytext+1));
111 				RETURN(FIELD);
112 			}
113 		}
114 <A>"$"{WS}*	{ RETURN(INDIRECT); }
115 <A>NF		{ mustfld=1;
116 		yylval = (int)setsymtab(yytext, NULL, 0.0, NUM, symtab);
117 		RETURN(VAR); }
118 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
119 		yylval = (int)setsymtab(yytext, NULL, watof(yytext),
120 			CON|NUM, symtab); RETURN(NUMBER); }
121 <A>"}"{WS}*\n	{ BEGIN sc; lineno++; RETURN(';'); }
122 <A>"}"		{ BEGIN sc; RETURN(';'); }
123 <A>";"\n		{ lineno++; RETURN(';'); }
124 <A>\n		{ lineno++; RETURN(NL); }
125 <A>while	RETURN(WHILE);
126 <A>for		RETURN(FOR);
127 <A>if		RETURN(IF);
128 <A>else		RETURN(ELSE);
129 <A>next		RETURN(NEXT);
130 <A>exit		RETURN(EXIT);
131 <A>break	RETURN(BREAK);
132 <A>continue	RETURN(CONTINUE);
133 <A>print	{ yylval = PRINT; RETURN(PRINT); }
134 <A>printf	{ yylval = PRINTF; RETURN(PRINTF); }
135 <A>sprintf	{ yylval = SPRINTF; RETURN(SPRINTF); }
136 <A>split	{ yylval = SPLIT; RETURN(SPLIT); }
137 <A>substr	RETURN(SUBSTR);
138 <A>index	RETURN(INDEX);
139 <A>in		RETURN(IN);
140 <A>getline	RETURN(GETLINE);
141 <A>length	{ yylval = FLENGTH; RETURN(FNCN); }
142 <A>log		{ yylval = FLOG; RETURN(FNCN); }
143 <A>int		{ yylval = FINT; RETURN(FNCN); }
144 <A>exp		{ yylval = FEXP; RETURN(FNCN); }
145 <A>sqrt		{ yylval = FSQRT; RETURN(FNCN); }
146 <A>{A}{B}*	{
147 		static wchar_t L_0[] = { 0 };
148 		yylval = (int)setsymtab(yytext, tostring(L_0), 0.0, STR|NUM,
149 			symtab);
150 		RETURN(VAR);
151 		}
152 <A>\"		{ BEGIN str; clen=0; }
153 
154 <A>#		{ BEGIN comment; }
155 <comment>\n	{ BEGIN A; lineno++; RETURN(NL); }
156 <comment>.	/* dummy */;
157 
158 <A>.		{ yylval = yytext[0]; RETURN(yytext[0]); }
159 
160 <reg>"["	{ BEGIN chc; clen=0; cflag=0; }
161 <reg>"[^"	{ BEGIN chc; clen=0; cflag=1; }
162 
163 <reg>"?"	RETURN(QUEST);
164 <reg>"+"	RETURN(PLUS);
165 <reg>"*"	RETURN(STAR);
166 <reg>"|"	RETURN(OR);
167 <reg>"."	RETURN(DOT);
168 <reg>"("	RETURN('(');
169 <reg>")"	RETURN(')');
170 <reg>"^"	RETURN('^');
171 <reg>"$"	RETURN('$');
172 <reg>\\{D}{D}{D}	{ wsscanf(yytext+1, "%o", &yylval); RETURN(CHAR); }
173 <reg>\\.	{	if (yytext[1]=='n') yylval = '\n';
174 			else if (yytext[1] == 't') yylval = '\t';
175 			else if (yytext[1] == 'b') yylval = '\b';
176 			else if (yytext[1] == 'r') yylval = '\r';
177 			else if (yytext[1] == 'f') yylval = '\f';
178 			else yylval = yytext[1];
179 			RETURN(CHAR);
180 		}
181 <reg>"/"	{ BEGIN A; unput('/'); }
182 <reg>\n		{ yyerror("newline in regular expression"); lineno++; BEGIN A; }
183 <reg>.		{ yylval = yytext[0]; RETURN(CHAR); }
184 
185 <str>\"		{ wchar_t *s; BEGIN A; cbuf[clen]=0; s = tostring(cbuf);
186 		cbuf[clen] = ' '; cbuf[++clen] = 0;
187 		yylval = (int)setsymtab(cbuf, s, 0.0, CON|STR, symtab);
188 		RETURN(STRING); }
189 <str>\n		{ yyerror("newline in string"); lineno++; BEGIN A; }
190 <str>"\\\""	{ cbuf[clen++]='"'; }
191 <str>"\\"n	{ cbuf[clen++]='\n'; }
192 <chc>"\\"n	{ cbuf[clen++]='\n'; }
193 <str>"\\"t	{ cbuf[clen++]='\t'; }
194 <chc>"\\"t	{ cbuf[clen++]='\t'; }
195 <str>"\\"b	{ cbuf[clen++]='\b'; }
196 <chc>"\\"b	{ cbuf[clen++]='\b'; }
197 <str>"\\"r	{ cbuf[clen++]='\r'; }
198 <chc>"\\"r	{ cbuf[clen++]='\r'; }
199 <str>"\\"f 	{ cbuf[clen++]='\f'; }
200 <chc>"\\"f 	{ cbuf[clen++]='\f'; }
201 <str>"\\\\"	{ cbuf[clen++]='\\'; }
202 <chc>"\\\\"	{ cbuf[clen++]='\\'; }
203 <str>.		{ CADD; }
204 
205 <chc>"\\""]"	{ cbuf[clen++]=']'; }
206 <chc>"]"	{ BEGIN reg; cbuf[clen]=0; yylval = (int)tostring(cbuf);
207 		if (cflag==0) { RETURN(CCL); }
208 		else { RETURN(NCCL); } }
209 <chc>\n		{ yyerror("newline in character class"); lineno++; BEGIN A; }
210 <chc>.		{ CADD; }
211 
212 %%
213 
214 int
215 input()
216 {
217 	int c;
218 	extern wchar_t *lexprog;
219 
220 	if (yysptr > yysbuf)
221 		c = U(*--yysptr);
222 	else if (yyin == NULL)
223 		c = *lexprog++;
224 	else
225 		c = getwc(yyin);
226 	if (c == '\n')
227 		yylineno++;
228 	else if (c == EOF)
229 		c = 0;
230 	return (c);
231 }
232 
233 void
234 startreg()
235 {
236 	BEGIN reg;
237 }
238