xref: /illumos-gate/usr/src/cmd/msgfmt/gnu_lex.c (revision 2a8bcb4e)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2001, 2002 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #include "gnu_msgfmt.h"
28*7c478bd9Sstevel@tonic-gate #include "gnu_lex.h"
29*7c478bd9Sstevel@tonic-gate #include "y.tab.h"
30*7c478bd9Sstevel@tonic-gate 
31*7c478bd9Sstevel@tonic-gate int	cur_line = 1;
32*7c478bd9Sstevel@tonic-gate 
33*7c478bd9Sstevel@tonic-gate static char	backbuf[MB_LEN_MAX];
34*7c478bd9Sstevel@tonic-gate static int	backlen = 0;
35*7c478bd9Sstevel@tonic-gate 
36*7c478bd9Sstevel@tonic-gate /*
37*7c478bd9Sstevel@tonic-gate  * get_mb() returns one multibyte character.
38*7c478bd9Sstevel@tonic-gate  *
39*7c478bd9Sstevel@tonic-gate  * This function uses the iconv() function to find out one
40*7c478bd9Sstevel@tonic-gate  * multibyte character from a sequence of bytes in the file stream.
41*7c478bd9Sstevel@tonic-gate  * The conversion from the codeset specified in the PO file to UTF-8
42*7c478bd9Sstevel@tonic-gate  * is performed.  The funcition reads another byte and calls iconv(),
43*7c478bd9Sstevel@tonic-gate  * until iconv() successfully returns as a valid UTF-8 character has
44*7c478bd9Sstevel@tonic-gate  * been converted or returns EILSEQ.  If iconv() successfully returned,
45*7c478bd9Sstevel@tonic-gate  * the function returns the read bytes as one character.  Otherwise,
46*7c478bd9Sstevel@tonic-gate  * returns error.  The string converted to UTF-8 in outbuf won't be
47*7c478bd9Sstevel@tonic-gate  * used at all.
48*7c478bd9Sstevel@tonic-gate  */
49*7c478bd9Sstevel@tonic-gate static size_t
get_mb(unsigned char * tmpbuf,unsigned char fc)50*7c478bd9Sstevel@tonic-gate get_mb(unsigned char *tmpbuf, unsigned char fc)
51*7c478bd9Sstevel@tonic-gate {
52*7c478bd9Sstevel@tonic-gate 	int	c;
53*7c478bd9Sstevel@tonic-gate 	char	outbuf[8];			/* max size of a UTF-8 char */
54*7c478bd9Sstevel@tonic-gate 	const char	*inptr;
55*7c478bd9Sstevel@tonic-gate 	char	*outptr;
56*7c478bd9Sstevel@tonic-gate 	size_t	insize = 0, inlen, outlen, ret;
57*7c478bd9Sstevel@tonic-gate 
58*7c478bd9Sstevel@tonic-gate 	tmpbuf[insize++] = fc;		/* size of tmpbuf is MB_LEN_MAX+1 */
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate 	if (cd == (iconv_t)-1) {
61*7c478bd9Sstevel@tonic-gate 		/* no conversion */
62*7c478bd9Sstevel@tonic-gate 		tmpbuf[insize] = '\0';
63*7c478bd9Sstevel@tonic-gate 		return (insize);
64*7c478bd9Sstevel@tonic-gate 	}
65*7c478bd9Sstevel@tonic-gate 
66*7c478bd9Sstevel@tonic-gate 	for (; ; ) {
67*7c478bd9Sstevel@tonic-gate 		inptr = (const char *)tmpbuf;
68*7c478bd9Sstevel@tonic-gate 		outptr = &outbuf[0];
69*7c478bd9Sstevel@tonic-gate 		inlen = insize;
70*7c478bd9Sstevel@tonic-gate 		outlen = sizeof (outbuf);
71*7c478bd9Sstevel@tonic-gate 
72*7c478bd9Sstevel@tonic-gate 		errno = 0;
73*7c478bd9Sstevel@tonic-gate 		ret = iconv(cd, &inptr, &inlen, &outptr, &outlen);
74*7c478bd9Sstevel@tonic-gate 		if (ret == (size_t)-1) {
75*7c478bd9Sstevel@tonic-gate 			/* iconv failed */
76*7c478bd9Sstevel@tonic-gate 			switch (errno) {
77*7c478bd9Sstevel@tonic-gate 			case EILSEQ:
78*7c478bd9Sstevel@tonic-gate 				/* invalid character found */
79*7c478bd9Sstevel@tonic-gate 				error(gettext(ERR_INVALID_CHAR),
80*7c478bd9Sstevel@tonic-gate 					cur_line, cur_po);
81*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
82*7c478bd9Sstevel@tonic-gate 			case EINVAL:
83*7c478bd9Sstevel@tonic-gate 				/* not enough input */
84*7c478bd9Sstevel@tonic-gate 				if (insize == MB_LEN_MAX) {
85*7c478bd9Sstevel@tonic-gate 					/* invalid character found */
86*7c478bd9Sstevel@tonic-gate 					error(gettext(ERR_INVALID_CHAR),
87*7c478bd9Sstevel@tonic-gate 						cur_line, cur_po);
88*7c478bd9Sstevel@tonic-gate 					/* NOTREACHED */
89*7c478bd9Sstevel@tonic-gate 				}
90*7c478bd9Sstevel@tonic-gate 				c = getc(fp);
91*7c478bd9Sstevel@tonic-gate 				if (c == EOF) {
92*7c478bd9Sstevel@tonic-gate 					error(gettext(ERR_UNEXP_EOF),
93*7c478bd9Sstevel@tonic-gate 						cur_line, cur_po);
94*7c478bd9Sstevel@tonic-gate 					/* NOTREACHED */
95*7c478bd9Sstevel@tonic-gate 				}
96*7c478bd9Sstevel@tonic-gate 				tmpbuf[insize++] = (unsigned char)c;
97*7c478bd9Sstevel@tonic-gate 
98*7c478bd9Sstevel@tonic-gate 				/* initialize the conversion */
99*7c478bd9Sstevel@tonic-gate 				outptr = &outbuf[0];
100*7c478bd9Sstevel@tonic-gate 				outlen = sizeof (outbuf);
101*7c478bd9Sstevel@tonic-gate 				(void) iconv(cd, NULL, NULL, &outptr, &outlen);
102*7c478bd9Sstevel@tonic-gate 
103*7c478bd9Sstevel@tonic-gate 				continue;
104*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
105*7c478bd9Sstevel@tonic-gate 			default:
106*7c478bd9Sstevel@tonic-gate 				/* should never happen */
107*7c478bd9Sstevel@tonic-gate 				error(ERR_INTERNAL,
108*7c478bd9Sstevel@tonic-gate 					cur_line, cur_po);
109*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
110*7c478bd9Sstevel@tonic-gate 			}
111*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
112*7c478bd9Sstevel@tonic-gate 		}
113*7c478bd9Sstevel@tonic-gate 		tmpbuf[insize] = '\0';
114*7c478bd9Sstevel@tonic-gate 		return (insize);
115*7c478bd9Sstevel@tonic-gate 		/* NOTRECHED */
116*7c478bd9Sstevel@tonic-gate 	}
117*7c478bd9Sstevel@tonic-gate }
118*7c478bd9Sstevel@tonic-gate 
119*7c478bd9Sstevel@tonic-gate static void
po_uninput(int c)120*7c478bd9Sstevel@tonic-gate po_uninput(int c)
121*7c478bd9Sstevel@tonic-gate {
122*7c478bd9Sstevel@tonic-gate 	(void) ungetc(c, fp);
123*7c478bd9Sstevel@tonic-gate 	if (c == '\n')
124*7c478bd9Sstevel@tonic-gate 		cur_line--;
125*7c478bd9Sstevel@tonic-gate }
126*7c478bd9Sstevel@tonic-gate 
127*7c478bd9Sstevel@tonic-gate static void
po_ungetc(struct ch * pch)128*7c478bd9Sstevel@tonic-gate po_ungetc(struct ch *pch)
129*7c478bd9Sstevel@tonic-gate {
130*7c478bd9Sstevel@tonic-gate 	if (backlen) {
131*7c478bd9Sstevel@tonic-gate 		error(gettext(ERR_INTERNAL), cur_line, cur_po);
132*7c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
133*7c478bd9Sstevel@tonic-gate 	}
134*7c478bd9Sstevel@tonic-gate 	if (!pch->eof) {
135*7c478bd9Sstevel@tonic-gate 		backlen = pch->len;
136*7c478bd9Sstevel@tonic-gate 		(void) memcpy(backbuf, pch->buf, backlen);
137*7c478bd9Sstevel@tonic-gate 	}
138*7c478bd9Sstevel@tonic-gate }
139*7c478bd9Sstevel@tonic-gate 
140*7c478bd9Sstevel@tonic-gate static struct ch *
po_getc(void)141*7c478bd9Sstevel@tonic-gate po_getc(void)
142*7c478bd9Sstevel@tonic-gate {
143*7c478bd9Sstevel@tonic-gate 	static struct ch	och;
144*7c478bd9Sstevel@tonic-gate 	int	c;
145*7c478bd9Sstevel@tonic-gate 
146*7c478bd9Sstevel@tonic-gate 	if (backlen) {
147*7c478bd9Sstevel@tonic-gate 		och.len = backlen;
148*7c478bd9Sstevel@tonic-gate 		(void) memcpy(och.buf, backbuf, backlen);
149*7c478bd9Sstevel@tonic-gate 		backlen = 0;
150*7c478bd9Sstevel@tonic-gate 		return (&och);
151*7c478bd9Sstevel@tonic-gate 	}
152*7c478bd9Sstevel@tonic-gate 
153*7c478bd9Sstevel@tonic-gate 	for (; ; ) {
154*7c478bd9Sstevel@tonic-gate 		c = getc(fp);
155*7c478bd9Sstevel@tonic-gate 		if (c == EOF) {
156*7c478bd9Sstevel@tonic-gate 			if (ferror(fp)) {
157*7c478bd9Sstevel@tonic-gate 				/* error happend */
158*7c478bd9Sstevel@tonic-gate 				error(gettext(ERR_READ_FAILED), cur_po);
159*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
160*7c478bd9Sstevel@tonic-gate 			}
161*7c478bd9Sstevel@tonic-gate 			och.len = 0;
162*7c478bd9Sstevel@tonic-gate 			och.eof = 1;
163*7c478bd9Sstevel@tonic-gate 			return (&och);
164*7c478bd9Sstevel@tonic-gate 		}
165*7c478bd9Sstevel@tonic-gate 		if (c == '\\') {
166*7c478bd9Sstevel@tonic-gate 			c = getc(fp);
167*7c478bd9Sstevel@tonic-gate 			if (c == '\n') {
168*7c478bd9Sstevel@tonic-gate 				/* this newline should be escaped */
169*7c478bd9Sstevel@tonic-gate 				cur_line++;
170*7c478bd9Sstevel@tonic-gate 				continue;
171*7c478bd9Sstevel@tonic-gate 			} else {
172*7c478bd9Sstevel@tonic-gate 				po_uninput(c);
173*7c478bd9Sstevel@tonic-gate 				och.len = 1;
174*7c478bd9Sstevel@tonic-gate 				och.eof = 0;
175*7c478bd9Sstevel@tonic-gate 				och.buf[0] = '\\';
176*7c478bd9Sstevel@tonic-gate 				return (&och);
177*7c478bd9Sstevel@tonic-gate 			}
178*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
179*7c478bd9Sstevel@tonic-gate 		}
180*7c478bd9Sstevel@tonic-gate 		if (c == '\n') {
181*7c478bd9Sstevel@tonic-gate 			cur_line++;
182*7c478bd9Sstevel@tonic-gate 			och.len = 1;
183*7c478bd9Sstevel@tonic-gate 			och.eof = 0;
184*7c478bd9Sstevel@tonic-gate 			och.buf[0] = '\n';
185*7c478bd9Sstevel@tonic-gate 			return (&och);
186*7c478bd9Sstevel@tonic-gate 		}
187*7c478bd9Sstevel@tonic-gate 		if (isascii((unsigned char)c)) {
188*7c478bd9Sstevel@tonic-gate 			/* single byte ascii */
189*7c478bd9Sstevel@tonic-gate 			och.len = 1;
190*7c478bd9Sstevel@tonic-gate 			och.eof = 0;
191*7c478bd9Sstevel@tonic-gate 			och.buf[0] = (unsigned char)c;
192*7c478bd9Sstevel@tonic-gate 			return (&och);
193*7c478bd9Sstevel@tonic-gate 		}
194*7c478bd9Sstevel@tonic-gate 
195*7c478bd9Sstevel@tonic-gate 		och.len = get_mb(&och.buf[0], (unsigned char)c);
196*7c478bd9Sstevel@tonic-gate 		och.eof = 0;
197*7c478bd9Sstevel@tonic-gate 		return (&och);
198*7c478bd9Sstevel@tonic-gate 	}
199*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
200*7c478bd9Sstevel@tonic-gate }
201*7c478bd9Sstevel@tonic-gate 
202*7c478bd9Sstevel@tonic-gate static void
extend_buf(char ** buf,size_t * size,size_t add)203*7c478bd9Sstevel@tonic-gate extend_buf(char **buf, size_t *size, size_t add)
204*7c478bd9Sstevel@tonic-gate {
205*7c478bd9Sstevel@tonic-gate 	char	*tmp;
206*7c478bd9Sstevel@tonic-gate 
207*7c478bd9Sstevel@tonic-gate 	*size += add;
208*7c478bd9Sstevel@tonic-gate 	tmp = (char *)Xrealloc(*buf, *size);
209*7c478bd9Sstevel@tonic-gate 	*buf = tmp;
210*7c478bd9Sstevel@tonic-gate }
211*7c478bd9Sstevel@tonic-gate 
212*7c478bd9Sstevel@tonic-gate static struct ch	*
expand_es(void)213*7c478bd9Sstevel@tonic-gate expand_es(void)
214*7c478bd9Sstevel@tonic-gate {
215*7c478bd9Sstevel@tonic-gate 	int	c, n, loop;
216*7c478bd9Sstevel@tonic-gate 	static struct ch	och;
217*7c478bd9Sstevel@tonic-gate 	struct ch	*pch;
218*7c478bd9Sstevel@tonic-gate 
219*7c478bd9Sstevel@tonic-gate 	pch = po_getc();
220*7c478bd9Sstevel@tonic-gate 	if (pch->eof) {
221*7c478bd9Sstevel@tonic-gate 		error(gettext(ERR_UNEXP_EOF),
222*7c478bd9Sstevel@tonic-gate 			cur_line, cur_po);
223*7c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
224*7c478bd9Sstevel@tonic-gate 	}
225*7c478bd9Sstevel@tonic-gate 	if (pch->len > 1) {
226*7c478bd9Sstevel@tonic-gate 		/* not a valid escape sequence */
227*7c478bd9Sstevel@tonic-gate 		return (pch);
228*7c478bd9Sstevel@tonic-gate 	}
229*7c478bd9Sstevel@tonic-gate 
230*7c478bd9Sstevel@tonic-gate 	och.len = 1;
231*7c478bd9Sstevel@tonic-gate 	och.eof = 0;
232*7c478bd9Sstevel@tonic-gate 	switch (pch->buf[0]) {
233*7c478bd9Sstevel@tonic-gate 	case '"':
234*7c478bd9Sstevel@tonic-gate 	case '\\':
235*7c478bd9Sstevel@tonic-gate 		och.buf[0] = pch->buf[0];
236*7c478bd9Sstevel@tonic-gate 		break;
237*7c478bd9Sstevel@tonic-gate 	case 'b':
238*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\b';
239*7c478bd9Sstevel@tonic-gate 		break;
240*7c478bd9Sstevel@tonic-gate 	case 'f':
241*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\f';
242*7c478bd9Sstevel@tonic-gate 		break;
243*7c478bd9Sstevel@tonic-gate 	case 'n':
244*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\n';
245*7c478bd9Sstevel@tonic-gate 		break;
246*7c478bd9Sstevel@tonic-gate 	case 'r':
247*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\r';
248*7c478bd9Sstevel@tonic-gate 		break;
249*7c478bd9Sstevel@tonic-gate 	case 't':
250*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\t';
251*7c478bd9Sstevel@tonic-gate 		break;
252*7c478bd9Sstevel@tonic-gate 	case 'v':
253*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\v';
254*7c478bd9Sstevel@tonic-gate 		break;
255*7c478bd9Sstevel@tonic-gate 	case 'a':
256*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\a';
257*7c478bd9Sstevel@tonic-gate 		break;
258*7c478bd9Sstevel@tonic-gate 	case '0':
259*7c478bd9Sstevel@tonic-gate 	case '1':
260*7c478bd9Sstevel@tonic-gate 	case '2':
261*7c478bd9Sstevel@tonic-gate 	case '3':
262*7c478bd9Sstevel@tonic-gate 	case '4':
263*7c478bd9Sstevel@tonic-gate 	case '5':
264*7c478bd9Sstevel@tonic-gate 	case '6':
265*7c478bd9Sstevel@tonic-gate 	case '7':
266*7c478bd9Sstevel@tonic-gate 		/* octal */
267*7c478bd9Sstevel@tonic-gate 		c = pch->buf[0];
268*7c478bd9Sstevel@tonic-gate 		for (n = 0, loop = 0; ; ) {
269*7c478bd9Sstevel@tonic-gate 			n = n * 8 + c - '0';
270*7c478bd9Sstevel@tonic-gate 			loop++;
271*7c478bd9Sstevel@tonic-gate 			if (loop >= 3)
272*7c478bd9Sstevel@tonic-gate 				break;
273*7c478bd9Sstevel@tonic-gate 			pch = po_getc();
274*7c478bd9Sstevel@tonic-gate 			if (pch->eof) {
275*7c478bd9Sstevel@tonic-gate 				error(gettext(ERR_UNEXP_EOF),
276*7c478bd9Sstevel@tonic-gate 					cur_line, cur_po);
277*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
278*7c478bd9Sstevel@tonic-gate 			}
279*7c478bd9Sstevel@tonic-gate 			if ((pch->len > 1) || (pch->buf[0] < '0') ||
280*7c478bd9Sstevel@tonic-gate 				(pch->buf[0] > '7'))
281*7c478bd9Sstevel@tonic-gate 				break;
282*7c478bd9Sstevel@tonic-gate 			c = pch->buf[0];
283*7c478bd9Sstevel@tonic-gate 		}
284*7c478bd9Sstevel@tonic-gate 		po_ungetc(pch);
285*7c478bd9Sstevel@tonic-gate 		och.buf[0] = (unsigned char)n;
286*7c478bd9Sstevel@tonic-gate 		break;
287*7c478bd9Sstevel@tonic-gate 	case 'x':
288*7c478bd9Sstevel@tonic-gate 		/* hex */
289*7c478bd9Sstevel@tonic-gate 		pch = po_getc();
290*7c478bd9Sstevel@tonic-gate 		if (pch->eof) {
291*7c478bd9Sstevel@tonic-gate 			error(gettext(ERR_UNEXP_EOF),
292*7c478bd9Sstevel@tonic-gate 				cur_line, cur_po);
293*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
294*7c478bd9Sstevel@tonic-gate 		}
295*7c478bd9Sstevel@tonic-gate 		if (pch->len > 1) {
296*7c478bd9Sstevel@tonic-gate 			po_ungetc(pch);
297*7c478bd9Sstevel@tonic-gate 			och.buf[0] = 'x';
298*7c478bd9Sstevel@tonic-gate 			break;
299*7c478bd9Sstevel@tonic-gate 		}
300*7c478bd9Sstevel@tonic-gate 		c = pch->buf[0];
301*7c478bd9Sstevel@tonic-gate 		if (!isxdigit((unsigned char)c)) {
302*7c478bd9Sstevel@tonic-gate 			po_ungetc(pch);
303*7c478bd9Sstevel@tonic-gate 			och.buf[0] = 'x';
304*7c478bd9Sstevel@tonic-gate 			break;
305*7c478bd9Sstevel@tonic-gate 		}
306*7c478bd9Sstevel@tonic-gate 		if (isdigit((unsigned char)c)) {
307*7c478bd9Sstevel@tonic-gate 			n = c - '0';
308*7c478bd9Sstevel@tonic-gate 		} else if (isupper((unsigned char)c)) {
309*7c478bd9Sstevel@tonic-gate 			n = c - 'A' + 10;
310*7c478bd9Sstevel@tonic-gate 		} else {
311*7c478bd9Sstevel@tonic-gate 			n = c - 'a' + 10;
312*7c478bd9Sstevel@tonic-gate 		}
313*7c478bd9Sstevel@tonic-gate 
314*7c478bd9Sstevel@tonic-gate 		pch = po_getc();
315*7c478bd9Sstevel@tonic-gate 		if (pch->eof) {
316*7c478bd9Sstevel@tonic-gate 			error(gettext(ERR_UNEXP_EOF),
317*7c478bd9Sstevel@tonic-gate 				cur_line, cur_po);
318*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
319*7c478bd9Sstevel@tonic-gate 		}
320*7c478bd9Sstevel@tonic-gate 		if (pch->len > 1) {
321*7c478bd9Sstevel@tonic-gate 			po_ungetc(pch);
322*7c478bd9Sstevel@tonic-gate 			och.buf[0] = (unsigned char)n;
323*7c478bd9Sstevel@tonic-gate 			break;
324*7c478bd9Sstevel@tonic-gate 		}
325*7c478bd9Sstevel@tonic-gate 		c = pch->buf[0];
326*7c478bd9Sstevel@tonic-gate 		if (!isxdigit((unsigned char)c)) {
327*7c478bd9Sstevel@tonic-gate 			po_ungetc(pch);
328*7c478bd9Sstevel@tonic-gate 			och.buf[0] = (unsigned char)n;
329*7c478bd9Sstevel@tonic-gate 			break;
330*7c478bd9Sstevel@tonic-gate 		}
331*7c478bd9Sstevel@tonic-gate 		n *= 16;
332*7c478bd9Sstevel@tonic-gate 		if (isdigit((unsigned char)c)) {
333*7c478bd9Sstevel@tonic-gate 			n += c - '0';
334*7c478bd9Sstevel@tonic-gate 		} else if (isupper((unsigned char)c)) {
335*7c478bd9Sstevel@tonic-gate 			n += c - 'A' + 10;
336*7c478bd9Sstevel@tonic-gate 		} else {
337*7c478bd9Sstevel@tonic-gate 			n += c - 'a' + 10;
338*7c478bd9Sstevel@tonic-gate 		}
339*7c478bd9Sstevel@tonic-gate 		och.buf[0] = (unsigned char)n;
340*7c478bd9Sstevel@tonic-gate 		break;
341*7c478bd9Sstevel@tonic-gate 
342*7c478bd9Sstevel@tonic-gate 	default:
343*7c478bd9Sstevel@tonic-gate 		och.buf[0] = pch->buf[0];
344*7c478bd9Sstevel@tonic-gate 		break;
345*7c478bd9Sstevel@tonic-gate 	}
346*7c478bd9Sstevel@tonic-gate 	return (&och);
347*7c478bd9Sstevel@tonic-gate }
348*7c478bd9Sstevel@tonic-gate 
349*7c478bd9Sstevel@tonic-gate int
yylex(void)350*7c478bd9Sstevel@tonic-gate yylex(void)
351*7c478bd9Sstevel@tonic-gate {
352*7c478bd9Sstevel@tonic-gate 	unsigned int	uc;
353*7c478bd9Sstevel@tonic-gate 	struct ch	*pch;
354*7c478bd9Sstevel@tonic-gate 	char	*buf;
355*7c478bd9Sstevel@tonic-gate 	size_t	buf_size, buf_pos;
356*7c478bd9Sstevel@tonic-gate 
357*7c478bd9Sstevel@tonic-gate 	for (; ; ) {
358*7c478bd9Sstevel@tonic-gate 		pch = po_getc();
359*7c478bd9Sstevel@tonic-gate 
360*7c478bd9Sstevel@tonic-gate 		if (pch->eof) {
361*7c478bd9Sstevel@tonic-gate 			/* EOF */
362*7c478bd9Sstevel@tonic-gate 			return (0);
363*7c478bd9Sstevel@tonic-gate 		}
364*7c478bd9Sstevel@tonic-gate 
365*7c478bd9Sstevel@tonic-gate 		if (pch->len > 1) {
366*7c478bd9Sstevel@tonic-gate 			/* multi byte */
367*7c478bd9Sstevel@tonic-gate 			yylval.c.len = pch->len;
368*7c478bd9Sstevel@tonic-gate 			(void) memcpy(yylval.c.buf, pch->buf, pch->len);
369*7c478bd9Sstevel@tonic-gate 			return (CHR);
370*7c478bd9Sstevel@tonic-gate 		}
371*7c478bd9Sstevel@tonic-gate 		/* single byte */
372*7c478bd9Sstevel@tonic-gate 		switch (pch->buf[0]) {
373*7c478bd9Sstevel@tonic-gate 		case ' ':
374*7c478bd9Sstevel@tonic-gate 		case '\t':
375*7c478bd9Sstevel@tonic-gate 		case '\n':
376*7c478bd9Sstevel@tonic-gate 			break;
377*7c478bd9Sstevel@tonic-gate 
378*7c478bd9Sstevel@tonic-gate 		case '#':
379*7c478bd9Sstevel@tonic-gate 			/* comment start */
380*7c478bd9Sstevel@tonic-gate 			buf_size = CBUFSIZE;
381*7c478bd9Sstevel@tonic-gate 			buf = (char *)Xmalloc(buf_size);
382*7c478bd9Sstevel@tonic-gate 			buf_pos = 0;
383*7c478bd9Sstevel@tonic-gate 			pch = po_getc();
384*7c478bd9Sstevel@tonic-gate 			while (!pch->eof &&
385*7c478bd9Sstevel@tonic-gate 				((pch->len != 1) || (pch->buf[0] != '\n'))) {
386*7c478bd9Sstevel@tonic-gate 				if (buf_pos + pch->len + 1 > buf_size)
387*7c478bd9Sstevel@tonic-gate 					extend_buf(&buf, &buf_size, CBUFSIZE);
388*7c478bd9Sstevel@tonic-gate 				(void) memcpy(buf + buf_pos,
389*7c478bd9Sstevel@tonic-gate 					pch->buf, pch->len);
390*7c478bd9Sstevel@tonic-gate 				buf_pos += pch->len;
391*7c478bd9Sstevel@tonic-gate 				pch = po_getc();
392*7c478bd9Sstevel@tonic-gate 			}
393*7c478bd9Sstevel@tonic-gate 			buf[buf_pos] = '\0';
394*7c478bd9Sstevel@tonic-gate 			yylval.str = buf;
395*7c478bd9Sstevel@tonic-gate 			return (COMMENT);
396*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
397*7c478bd9Sstevel@tonic-gate 
398*7c478bd9Sstevel@tonic-gate 		case '[':
399*7c478bd9Sstevel@tonic-gate 		case ']':
400*7c478bd9Sstevel@tonic-gate 			return (pch->buf[0]);
401*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
402*7c478bd9Sstevel@tonic-gate 
403*7c478bd9Sstevel@tonic-gate 		case '"':
404*7c478bd9Sstevel@tonic-gate 			buf_size = MBUFSIZE;
405*7c478bd9Sstevel@tonic-gate 			buf = (char *)Xmalloc(buf_size);
406*7c478bd9Sstevel@tonic-gate 			buf_pos = 0;
407*7c478bd9Sstevel@tonic-gate 			for (; ; ) {
408*7c478bd9Sstevel@tonic-gate 				pch = po_getc();
409*7c478bd9Sstevel@tonic-gate 
410*7c478bd9Sstevel@tonic-gate 				if (pch->eof) {
411*7c478bd9Sstevel@tonic-gate 					/* EOF */
412*7c478bd9Sstevel@tonic-gate 					error(gettext(ERR_UNEXP_EOF),
413*7c478bd9Sstevel@tonic-gate 						cur_line, cur_po);
414*7c478bd9Sstevel@tonic-gate 					/* NOTREACHED */
415*7c478bd9Sstevel@tonic-gate 				}
416*7c478bd9Sstevel@tonic-gate 
417*7c478bd9Sstevel@tonic-gate 				if (pch->len == 1) {
418*7c478bd9Sstevel@tonic-gate 					uc = pch->buf[0];
419*7c478bd9Sstevel@tonic-gate 
420*7c478bd9Sstevel@tonic-gate 					if (uc == '\n') {
421*7c478bd9Sstevel@tonic-gate 						error(gettext(ERR_UNEXP_EOL),
422*7c478bd9Sstevel@tonic-gate 							cur_line, cur_po);
423*7c478bd9Sstevel@tonic-gate 						/* NOTREACHED */
424*7c478bd9Sstevel@tonic-gate 					}
425*7c478bd9Sstevel@tonic-gate 					if (uc == '"')
426*7c478bd9Sstevel@tonic-gate 						break;
427*7c478bd9Sstevel@tonic-gate 					if (uc == '\\')
428*7c478bd9Sstevel@tonic-gate 						pch = expand_es();
429*7c478bd9Sstevel@tonic-gate 				}
430*7c478bd9Sstevel@tonic-gate 				if (buf_pos + pch->len + 1 > buf_size)
431*7c478bd9Sstevel@tonic-gate 					extend_buf(&buf, &buf_size,
432*7c478bd9Sstevel@tonic-gate 						MBUFSIZE);
433*7c478bd9Sstevel@tonic-gate 				(void) memcpy(buf + buf_pos,
434*7c478bd9Sstevel@tonic-gate 					pch->buf, pch->len);
435*7c478bd9Sstevel@tonic-gate 				buf_pos += pch->len;
436*7c478bd9Sstevel@tonic-gate 			}
437*7c478bd9Sstevel@tonic-gate 
438*7c478bd9Sstevel@tonic-gate 			buf[buf_pos] = '\0';
439*7c478bd9Sstevel@tonic-gate 			yylval.str = buf;
440*7c478bd9Sstevel@tonic-gate 			return (STR);
441*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
442*7c478bd9Sstevel@tonic-gate 
443*7c478bd9Sstevel@tonic-gate 		default:
444*7c478bd9Sstevel@tonic-gate 			uc = pch->buf[0];
445*7c478bd9Sstevel@tonic-gate 
446*7c478bd9Sstevel@tonic-gate 			if (isalpha(uc) || (uc == '_')) {
447*7c478bd9Sstevel@tonic-gate 				buf_size = KBUFSIZE;
448*7c478bd9Sstevel@tonic-gate 				buf = (char *)Xmalloc(buf_size);
449*7c478bd9Sstevel@tonic-gate 				buf_pos = 0;
450*7c478bd9Sstevel@tonic-gate 				buf[buf_pos++] = (char)uc;
451*7c478bd9Sstevel@tonic-gate 				pch = po_getc();
452*7c478bd9Sstevel@tonic-gate 				while (!pch->eof &&
453*7c478bd9Sstevel@tonic-gate 					(pch->len == 1) &&
454*7c478bd9Sstevel@tonic-gate 					(isalpha(uc = pch->buf[0]) ||
455*7c478bd9Sstevel@tonic-gate 					isdigit(uc) || (uc == '_'))) {
456*7c478bd9Sstevel@tonic-gate 					if (buf_pos + 1 + 1 > buf_size)
457*7c478bd9Sstevel@tonic-gate 						extend_buf(&buf, &buf_size,
458*7c478bd9Sstevel@tonic-gate 							KBUFSIZE);
459*7c478bd9Sstevel@tonic-gate 					buf[buf_pos++] = (char)uc;
460*7c478bd9Sstevel@tonic-gate 					pch = po_getc();
461*7c478bd9Sstevel@tonic-gate 				}
462*7c478bd9Sstevel@tonic-gate 				/* push back the last char */
463*7c478bd9Sstevel@tonic-gate 				po_ungetc(pch);
464*7c478bd9Sstevel@tonic-gate 				buf[buf_pos] = '\0';
465*7c478bd9Sstevel@tonic-gate 				yylval.str = buf;
466*7c478bd9Sstevel@tonic-gate 				if (buf_pos > MAX_KW_LEN) {
467*7c478bd9Sstevel@tonic-gate 					/* kbuf is longer than any keywords */
468*7c478bd9Sstevel@tonic-gate 					return (SYMBOL);
469*7c478bd9Sstevel@tonic-gate 				}
470*7c478bd9Sstevel@tonic-gate 				yylval.num = cur_line;
471*7c478bd9Sstevel@tonic-gate 				if (strcmp(buf, KW_DOMAIN) == 0) {
472*7c478bd9Sstevel@tonic-gate 					free(buf);
473*7c478bd9Sstevel@tonic-gate 					return (DOMAIN);
474*7c478bd9Sstevel@tonic-gate 				} else if (strcmp(buf, KW_MSGID) == 0) {
475*7c478bd9Sstevel@tonic-gate 					free(buf);
476*7c478bd9Sstevel@tonic-gate 					return (MSGID);
477*7c478bd9Sstevel@tonic-gate 				} else if (strcmp(buf, KW_MSGID_PLURAL) == 0) {
478*7c478bd9Sstevel@tonic-gate 					free(buf);
479*7c478bd9Sstevel@tonic-gate 					return (MSGID_PLURAL);
480*7c478bd9Sstevel@tonic-gate 				} else if (strcmp(buf, KW_MSGSTR) == 0) {
481*7c478bd9Sstevel@tonic-gate 					free(buf);
482*7c478bd9Sstevel@tonic-gate 					return (MSGSTR);
483*7c478bd9Sstevel@tonic-gate 				} else {
484*7c478bd9Sstevel@tonic-gate 					free(buf);
485*7c478bd9Sstevel@tonic-gate 					return (SYMBOL);
486*7c478bd9Sstevel@tonic-gate 				}
487*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
488*7c478bd9Sstevel@tonic-gate 			}
489*7c478bd9Sstevel@tonic-gate 			if (isdigit(uc)) {
490*7c478bd9Sstevel@tonic-gate 				buf_size = NBUFSIZE;
491*7c478bd9Sstevel@tonic-gate 				buf = (char *)Xmalloc(buf_size);
492*7c478bd9Sstevel@tonic-gate 				buf_pos = 0;
493*7c478bd9Sstevel@tonic-gate 				buf[buf_pos++] = (char)uc;
494*7c478bd9Sstevel@tonic-gate 				pch = po_getc();
495*7c478bd9Sstevel@tonic-gate 				while (!pch->eof &&
496*7c478bd9Sstevel@tonic-gate 					(pch->len == 1) &&
497*7c478bd9Sstevel@tonic-gate 					isdigit(uc = pch->buf[0])) {
498*7c478bd9Sstevel@tonic-gate 					if (buf_pos + 1 + 1 > buf_size)
499*7c478bd9Sstevel@tonic-gate 						extend_buf(&buf, &buf_size,
500*7c478bd9Sstevel@tonic-gate 							NBUFSIZE);
501*7c478bd9Sstevel@tonic-gate 					buf[buf_pos++] = (char)uc;
502*7c478bd9Sstevel@tonic-gate 					pch = po_getc();
503*7c478bd9Sstevel@tonic-gate 				}
504*7c478bd9Sstevel@tonic-gate 				/* push back the last char */
505*7c478bd9Sstevel@tonic-gate 				po_ungetc(pch);
506*7c478bd9Sstevel@tonic-gate 				buf[buf_pos] = '\0';
507*7c478bd9Sstevel@tonic-gate 				yylval.num = atoi(buf);
508*7c478bd9Sstevel@tonic-gate 				free(buf);
509*7c478bd9Sstevel@tonic-gate 				return (NUM);
510*7c478bd9Sstevel@tonic-gate 			}
511*7c478bd9Sstevel@tonic-gate 			/* just a char */
512*7c478bd9Sstevel@tonic-gate 			yylval.c.len = 1;
513*7c478bd9Sstevel@tonic-gate 			yylval.c.buf[0] = uc;
514*7c478bd9Sstevel@tonic-gate 			return (CHR);
515*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
516*7c478bd9Sstevel@tonic-gate 		}
517*7c478bd9Sstevel@tonic-gate 	}
518*7c478bd9Sstevel@tonic-gate }
519