xref: /illumos-gate/usr/src/cmd/vi/port/ex_re.c (revision 55fea89d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 /* Copyright (c) 1981 Regents of the University of California */
31 
32 #include "ex.h"
33 #include "ex_re.h"
34 
35 /* from libgen */
36 char *_compile(const char *, char *, char *, int);
37 
38 /*
39  * The compiled-regular-expression storage areas (re, scanre, and subre)
40  * have been changed into dynamically allocated memory areas, in both the
41  * Solaris and XPG4 versions.
42  *
43  * In the Solaris version, which uses the original libgen(3g) compile()
44  * and step() calls, these areas are allocated once, and then data are
45  * copied between them subsequently, as they were in the original
46  * implementation.  This is possible because the compiled information is
47  * a self-contained block of bits.
48  *
49  * In the XPG4 version, the expr:compile.o object is linked in as a
50  * simulation of these functions using the new regcomp() and regexec()
51  * functions.  The problem here is that the resulting
52  * compiled-regular-expression data contain pointers to other data, which
53  * need to be freed, but only when we are quite sure that we are done
54  * with them - and certainly not before.  There was an earlier attempt to
55  * handle these differences, but that effort was flawed.
56  */
57 
58 extern int	getchar();
59 #ifdef XPG4
60 void regex_comp_free(void *);
61 extern size_t regexc_size;	/* compile.c: size of regex_comp structure */
62 #endif /* XPG4 */
63 
64 /*
65  * Global, substitute and regular expressions.
66  * Very similar to ed, with some re extensions and
67  * confirmed substitute.
68  */
69 void
global(k)70 global(k)
71 	bool k;
72 {
73 	unsigned char *gp;
74 	int c;
75 	line *a1;
76 	unsigned char globuf[GBSIZE], *Cwas;
77 	int nlines = lineDOL();
78 	int oinglobal = inglobal;
79 	unsigned char *oglobp = globp;
80 	char	multi[MB_LEN_MAX + 1];
81 	wchar_t	wc;
82 	int	len;
83 
84 
85 	Cwas = Command;
86 	/*
87 	 * States of inglobal:
88 	 *  0: ordinary - not in a global command.
89 	 *  1: text coming from some buffer, not tty.
90 	 *  2: like 1, but the source of the buffer is a global command.
91 	 * Hence you're only in a global command if inglobal==2. This
92 	 * strange sounding convention is historically derived from
93 	 * everybody simulating a global command.
94 	 */
95 	if (inglobal==2)
96 		error(value(vi_TERSE) ? gettext("Global within global") :
97 gettext("Global within global not allowed"));
98 	markDOT();
99 	setall();
100 	nonzero();
101 	if (skipend())
102 		error(value(vi_TERSE) ? gettext("Global needs re") :
103 gettext("Missing regular expression for global"));
104 	c = getchar();
105 	(void)vi_compile(c, 1);
106 	savere(&scanre);
107 	gp = globuf;
108 	while ((c = peekchar()) != '\n') {
109 		if (!isascii(c)) {
110 			if (c == EOF) {
111 				c = '\n';
112 				ungetchar(c);
113 				goto out;
114 			}
115 
116 mb_copy:
117 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
118 				if ((gp + len) >= &globuf[GBSIZE - 2])
119 					error(gettext("Global command too long"));
120 				strncpy(gp, multi, len);
121 				gp += len;
122 				continue;
123 			}
124 		}
125 
126 		(void) getchar();
127 		switch (c) {
128 
129 		case EOF:
130 			c = '\n';
131 			ungetchar(c);
132 			goto out;
133 
134 		case '\\':
135 			c = peekchar();
136 			if (!isascii(c)) {
137 				*gp++ = '\\';
138 				goto mb_copy;
139 			}
140 
141 			(void) getchar();
142 			switch (c) {
143 
144 			case '\\':
145 				ungetchar(c);
146 				break;
147 
148 			case '\n':
149 				break;
150 
151 			default:
152 				*gp++ = '\\';
153 				break;
154 			}
155 			break;
156 		}
157 		*gp++ = c;
158 		if (gp >= &globuf[GBSIZE - 2])
159 			error(gettext("Global command too long"));
160 	}
161 
162 out:
163 	donewline();
164 	*gp++ = c;
165 	*gp++ = 0;
166 	saveall();
167 	inglobal = 2;
168 	for (a1 = one; a1 <= dol; a1++) {
169 		*a1 &= ~01;
170 		if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
171 			*a1 |= 01;
172 	}
173 #ifdef notdef
174 /*
175  * This code is commented out for now.  The problem is that we don't
176  * fix up the undo area the way we should.  Basically, I think what has
177  * to be done is to copy the undo area down (since we shrunk everything)
178  * and move the various pointers into it down too.  I will do this later
179  * when I have time. (Mark, 10-20-80)
180  */
181 	/*
182 	 * Special case: g/.../d (avoid n^2 algorithm)
183 	 */
184 	if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
185 		gdelete();
186 		return;
187 	}
188 #endif
189 	if (inopen)
190 		inopen = -1;
191 	/*
192 	 * Now for each marked line, set dot there and do the commands.
193 	 * Note the n^2 behavior here for lots of lines matching.
194 	 * This is really needed: in some cases you could delete lines,
195 	 * causing a marked line to be moved before a1 and missed if
196 	 * we didn't restart at zero each time.
197 	 */
198 	for (a1 = one; a1 <= dol; a1++) {
199 		if (*a1 & 01) {
200 			*a1 &= ~01;
201 			dot = a1;
202 			globp = globuf;
203 			commands(1, 1);
204 			a1 = zero;
205 		}
206 	}
207 	globp = oglobp;
208 	inglobal = oinglobal;
209 	endline = 1;
210 	Command = Cwas;
211 	netchHAD(nlines);
212 	setlastchar(EOF);
213 	if (inopen) {
214 		ungetchar(EOF);
215 		inopen = 1;
216 	}
217 }
218 
219 /*
220  * gdelete: delete inside a global command. Handles the
221  * special case g/r.e./d. All lines to be deleted have
222  * already been marked. Squeeze the remaining lines together.
223  * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
224  * and g/r.e./.,/r.e.2/d are not treated specially.  There is no
225  * good reason for this except the question: where to you draw the line?
226  */
227 void
gdelete(void)228 gdelete(void)
229 {
230 	line *a1, *a2, *a3;
231 
232 	a3 = dol;
233 	/* find first marked line. can skip all before it */
234 	for (a1=zero; (*a1&01)==0; a1++)
235 		if (a1>=a3)
236 			return;
237 	/* copy down unmarked lines, compacting as we go. */
238 	for (a2=a1+1; a2<=a3;) {
239 		if (*a2&01) {
240 			a2++;		/* line is marked, skip it */
241 			dot = a1;	/* dot left after line deletion */
242 		} else
243 			*a1++ = *a2++;	/* unmarked, copy it */
244 	}
245 	dol = a1-1;
246 	if (dot>dol)
247 		dot = dol;
248 	change();
249 }
250 
251 bool	cflag;
252 int	scount, slines, stotal;
253 
254 int
substitute(int c)255 substitute(int c)
256 {
257 	line *addr;
258 	int n;
259 	int gsubf, hopcount;
260 
261 	gsubf = compsub(c);
262 	if(FIXUNDO)
263 		save12(), undkind = UNDCHANGE;
264 	stotal = 0;
265 	slines = 0;
266 	for (addr = addr1; addr <= addr2; addr++) {
267 		scount = hopcount = 0;
268 		if (dosubcon(0, addr) == 0)
269 			continue;
270 		if (gsubf) {
271 			/*
272 			 * The loop can happen from s/\</&/g
273 			 * but we don't want to break other, reasonable cases.
274 			 */
275 			hopcount = 0;
276 			while (*loc2) {
277 				if (++hopcount > sizeof linebuf)
278 					error(gettext("substitution loop"));
279 				if (dosubcon(1, addr) == 0)
280 					break;
281 			}
282 		}
283 		if (scount) {
284 			stotal += scount;
285 			slines++;
286 			putmark(addr);
287 			n = append(getsub, addr);
288 			addr += n;
289 			addr2 += n;
290 		}
291 	}
292 	if (stotal == 0 && !inglobal && !cflag)
293 		error(value(vi_TERSE) ? gettext("Fail") :
294 gettext("Substitute pattern match failed"));
295 	snote(stotal, slines);
296 	return (stotal);
297 }
298 
299 int
compsub(int ch)300 compsub(int ch)
301 {
302 	int seof, c, uselastre;
303 	static int gsubf;
304 	static unsigned char remem[RHSSIZE];
305 	static int remflg = -1;
306 
307 	if (!value(vi_EDCOMPATIBLE))
308 		gsubf = cflag = 0;
309 	uselastre = 0;
310 	switch (ch) {
311 
312 	case 's':
313 		(void)skipwh();
314 		seof = getchar();
315 		if (endcmd(seof) || any(seof, "gcr")) {
316 			ungetchar(seof);
317 			goto redo;
318 		}
319 		if (isalpha(seof) || isdigit(seof))
320 			error(value(vi_TERSE) ? gettext("Substitute needs re") :
321 gettext("Missing regular expression for substitute"));
322 		seof = vi_compile(seof, 1);
323 		uselastre = 1;
324 		comprhs(seof);
325 		gsubf = cflag = 0;
326 		break;
327 
328 	case '~':
329 		uselastre = 1;
330 		/* FALLTHROUGH */
331 	case '&':
332 	redo:
333 		if (re == NULL || re->Expbuf[1] == 0)
334 			error(value(vi_TERSE) ? gettext("No previous re") :
335 gettext("No previous regular expression"));
336 		if (subre == NULL || subre->Expbuf[1] == 0)
337 			error(value(vi_TERSE) ? gettext("No previous substitute re") :
338 gettext("No previous substitute to repeat"));
339 		break;
340 	}
341 	for (;;) {
342 		c = getchar();
343 		switch (c) {
344 
345 		case 'g':
346 			gsubf = !gsubf;
347 			continue;
348 
349 		case 'c':
350 			cflag = !cflag;
351 			continue;
352 
353 		case 'r':
354 			uselastre = 1;
355 			continue;
356 
357 		default:
358 			ungetchar(c);
359 			setcount();
360 			donewline();
361 			if (uselastre)
362 				savere(&subre);
363 			else
364 				resre(subre);
365 
366 			/*
367 			 * The % by itself on the right hand side means
368 			 * that the previous value of the right hand side
369 			 * should be used. A -1 is used to indicate no
370 			 * previously remembered search string.
371 			 */
372 
373 			if (rhsbuf[0] == '%' && rhsbuf[1] == 0)
374 				if (remflg == -1)
375 					error(gettext("No previously remembered string"));
376 			        else
377 					strcpy(rhsbuf, remem);
378 			else {
379 				strcpy(remem, rhsbuf);
380 				remflg = 1;
381 			}
382 			return (gsubf);
383 		}
384 	}
385 }
386 
387 void
comprhs(int seof)388 comprhs(int seof)
389 {
390 	unsigned char *rp, *orp;
391 	int c;
392 	unsigned char orhsbuf[RHSSIZE];
393 	char	multi[MB_LEN_MAX + 1];
394 	int	len;
395 	wchar_t	wc;
396 
397 	rp = rhsbuf;
398 	CP(orhsbuf, rp);
399 	for (;;) {
400 		c = peekchar();
401 		if (c == seof) {
402 			(void) getchar();
403 			break;
404 		}
405 
406 		if (!isascii(c) && c != EOF) {
407 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
408 				if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
409 					goto toobig;
410 				strncpy(rp, multi, len);
411 				rp += len;
412 				continue;
413 			}
414 		}
415 
416 		(void) getchar();
417 		switch (c) {
418 
419 		case '\\':
420 			c = peekchar();
421 			if (c == EOF) {
422 				(void) getchar();
423 				error(gettext("Replacement string ends with \\"));
424 			}
425 
426 			if (!isascii(c)) {
427 				*rp++ = '\\';
428 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
429 					if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
430 						goto over_flow;
431 					strncpy(rp, multi, len);
432 					rp += len;
433 					continue;
434 				}
435 			}
436 
437 			(void) getchar();
438 			if (value(vi_MAGIC)) {
439 				/*
440 				 * When "magic", \& turns into a plain &,
441 				 * and all other chars work fine quoted.
442 				 */
443 				if (c != '&') {
444 					if(rp >= &rhsbuf[RHSSIZE - 1]) {
445 						*rp=0;
446 						error(value(vi_TERSE) ?
447 gettext("Replacement pattern too long") :
448 gettext("Replacement pattern too long - limit 256 characters"));
449 					}
450 					*rp++ = '\\';
451 				}
452 				break;
453 			}
454 magic:
455 			if (c == '~') {
456 				for (orp = orhsbuf; *orp; *rp++ = *orp++)
457 					if (rp >= &rhsbuf[RHSSIZE - 1])
458 						goto toobig;
459 				continue;
460 			}
461 			if(rp >= &rhsbuf[RHSSIZE - 1]) {
462 over_flow:
463 				*rp=0;
464 				error(value(vi_TERSE) ?
465 gettext("Replacement pattern too long") :
466 gettext("Replacement pattern too long - limit 256 characters"));
467 			}
468 			*rp++ = '\\';
469 			break;
470 
471 		case '\n':
472 		case EOF:
473 			if (!(globp && globp[0])) {
474 				ungetchar(c);
475 				goto endrhs;
476 			}
477 			/* FALLTHROUGH */
478 
479 		case '~':
480 		case '&':
481 			if (value(vi_MAGIC))
482 				goto magic;
483 			break;
484 		}
485 		if (rp >= &rhsbuf[RHSSIZE - 1]) {
486 toobig:
487 			*rp = 0;
488 			error(value(vi_TERSE) ?
489 gettext("Replacement pattern too long") :
490 gettext("Replacement pattern too long - limit 256 characters"));
491 		}
492 		*rp++ = c;
493 	}
494 endrhs:
495 	*rp++ = 0;
496 }
497 
498 int
getsub(void)499 getsub(void)
500 {
501 	unsigned char *p;
502 
503 	if ((p = linebp) == 0)
504 		return (EOF);
505 	strcLIN(p);
506 	linebp = 0;
507 	return (0);
508 }
509 
510 int
dosubcon(bool f,line * a)511 dosubcon(bool f, line *a)
512 {
513 
514 	if (execute(f, a) == 0)
515 		return (0);
516 	if (confirmed(a)) {
517 		dosub();
518 		scount++;
519 	}
520 	return (1);
521 }
522 
523 int
confirmed(line * a)524 confirmed(line *a)
525 {
526 	int c, cnt, ch;
527 
528 	if (cflag == 0)
529 		return (1);
530 	pofix();
531 	pline(lineno(a));
532 	if (inopen)
533 		putchar('\n' | QUOTE);
534 	c = lcolumn(loc1);
535 	ugo(c, ' ');
536 	ugo(lcolumn(loc2) - c, '^');
537 	flush();
538 	cnt = 0;
539 bkup:
540 	ch = c = getkey();
541 again:
542 	if (c == '\b') {
543 		if ((inopen)
544 		 && (cnt > 0)) {
545 			putchar('\b' | QUOTE);
546 			putchar(' ');
547 			putchar('\b' | QUOTE), flush();
548 			cnt --;
549 		}
550 		goto bkup;
551 	}
552 	if (c == '\r')
553 		c = '\n';
554 	if (inopen && MB_CUR_MAX == 1 || c < 0200) {
555 		putchar(c);
556 		flush();
557 		cnt++;
558 	}
559 	if (c != '\n' && c != EOF) {
560 		c = getkey();
561 		goto again;
562 	}
563 	noteinp();
564 	return (ch == 'y');
565 }
566 
567 void
ugo(int cnt,int with)568 ugo(int cnt, int with)
569 {
570 
571 	if (cnt > 0)
572 		do
573 			putchar(with);
574 		while (--cnt > 0);
575 }
576 
577 int	casecnt;
578 bool	destuc;
579 
580 void
dosub(void)581 dosub(void)
582 {
583 	unsigned char *lp, *sp, *rp;
584 	int c;
585 	int	len;
586 
587 	lp = linebuf;
588 	sp = genbuf;
589 	rp = rhsbuf;
590 	while (lp < (unsigned char *)loc1)
591 		*sp++ = *lp++;
592 	casecnt = 0;
593 	/*
594 	 * Caution: depending on the hardware, c will be either sign
595 	 * extended or not if C&QUOTE is set.  Thus, on a VAX, c will
596 	 * be < 0, but on a 3B, c will be >= 128.
597 	 */
598 	while (c = *rp) {
599 		if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
600 			len = 1;
601 		/* ^V <return> from vi to split lines */
602 		if (c == '\r')
603 			c = '\n';
604 
605 		if (c == '\\') {
606 			rp++;
607 			if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
608 				len = 1;
609 			switch (c = *rp++) {
610 
611 			case '&':
612 				sp = place(sp, loc1, loc2);
613 				if (sp == 0)
614 					goto ovflo;
615 				continue;
616 
617 			case 'l':
618 				casecnt = 1;
619 				destuc = 0;
620 				continue;
621 
622 			case 'L':
623 				casecnt = LBSIZE;
624 				destuc = 0;
625 				continue;
626 
627 			case 'u':
628 				casecnt = 1;
629 				destuc = 1;
630 				continue;
631 
632 			case 'U':
633 				casecnt = LBSIZE;
634 				destuc = 1;
635 				continue;
636 
637 			case 'E':
638 			case 'e':
639 				casecnt = 0;
640 				continue;
641 			}
642 			if(re != NULL && c >= '1' && c < re->Nbra + '1') {
643 				sp = place(sp, braslist[c - '1'] , braelist[c - '1']);
644 				if (sp == 0)
645 					goto ovflo;
646 				continue;
647 			}
648 			rp--;
649 		}
650 		if (len > 1) {
651 			if ((sp + len) >= &genbuf[LBSIZE])
652 				goto ovflo;
653 			strncpy(sp, rp, len);
654 		} else {
655 			if (casecnt)
656 				*sp = fixcase(c);
657 			else
658 				*sp = c;
659 		}
660 		sp += len; rp += len;
661 		if (sp >= &genbuf[LBSIZE])
662 ovflo:
663 			error(value(vi_TERSE) ? gettext("Line overflow") :
664 gettext("Line overflow in substitute"));
665 	}
666 	lp = (unsigned char *)loc2;
667 	loc2 = (char *)(linebuf + (sp - genbuf));
668 	while (*sp++ = *lp++)
669 		if (sp >= &genbuf[LBSIZE])
670 			goto ovflo;
671 	strcLIN(genbuf);
672 }
673 
674 int
fixcase(int c)675 fixcase(int c)
676 {
677 
678 	if (casecnt == 0)
679 		return (c);
680 	casecnt--;
681 	if (destuc) {
682 		if (islower(c))
683 			c = toupper(c);
684 	} else
685 		if (isupper(c))
686 			c = tolower(c);
687 	return (c);
688 }
689 
690 unsigned char *
place(sp,l1,l2)691 place(sp, l1, l2)
692 	unsigned char *sp, *l1, *l2;
693 {
694 
695 	while (l1 < l2) {
696 		*sp++ = fixcase(*l1++);
697 		if (sp >= &genbuf[LBSIZE])
698 			return (0);
699 	}
700 	return (sp);
701 }
702 
703 void
snote(int total,int nlines)704 snote(int total, int nlines)
705 {
706 
707 	if (!notable(total))
708 		return;
709 	if (nlines != 1 && nlines != total)
710 		viprintf(mesg(value(vi_TERSE) ?
711 			/*
712 			 * TRANSLATION_NOTE
713 			 *	Reference order of arguments must not
714 			 *	be changed using '%digit$', since vi's
715 			 *	viprintf() does not support it.
716 			 */
717 			    gettext("%d subs on %d lines") :
718 			/*
719 			 * TRANSLATION_NOTE
720 			 *	Reference order of arguments must not
721 			 *	be changed using '%digit$', since vi's
722 			 *	viprintf() does not support it.
723 			 */
724 			    gettext("%d substitutions on %d lines")),
725 		       total, nlines);
726 	else
727 		viprintf(mesg(value(vi_TERSE) ?
728 			    gettext("%d subs") :
729 			    gettext("%d substitutions")),
730 		       total);
731 	noonl();
732 	flush();
733 }
734 
735 #ifdef XPG4
736 #include <regex.h>
737 
738 extern int regcomp_flags;	/* use to specify cflags for regcomp() */
739 #endif /* XPG4 */
740 
741 int
vi_compile(int eof,int oknl)742 vi_compile(int eof, int oknl)
743 {
744 	int c;
745 	unsigned char *gp, *p1;
746 	unsigned char *rhsp;
747 	unsigned char rebuf[LBSIZE];
748 	char	multi[MB_LEN_MAX + 1];
749 	int	len;
750 	wchar_t	wc;
751 
752 #ifdef XPG4
753 	/*
754 	 * reset cflags to plain BRE
755 	 */
756 	regcomp_flags = 0;
757 #endif /* XPG4 */
758 
759 	gp = genbuf;
760 	if (isalpha(eof) || isdigit(eof))
761 error(gettext("Regular expressions cannot be delimited by letters or digits"));
762 	if(eof >= 0200 && MB_CUR_MAX > 1)
763 error(gettext("Regular expressions cannot be delimited by multibyte characters"));
764 	c = getchar();
765 	if (eof == '\\')
766 		switch (c) {
767 
768 		case '/':
769 		case '?':
770 			if (scanre == NULL || scanre->Expbuf[1] == 0)
771 error(value(vi_TERSE) ? gettext("No previous scan re") :
772 gettext("No previous scanning regular expression"));
773 			resre(scanre);
774 			return (c);
775 
776 		case '&':
777 			if (subre == NULL || subre->Expbuf[1] == 0)
778 error(value(vi_TERSE) ? gettext("No previous substitute re") :
779 gettext("No previous substitute regular expression"));
780 			resre(subre);
781 			return (c);
782 
783 		default:
784 error(value(vi_TERSE) ? gettext("Badly formed re") :
785 gettext("Regular expression \\ must be followed by / or ?"));
786 		}
787 	if (c == eof || c == '\n' || c == EOF) {
788 		if (re == NULL || re->Expbuf[1] == 0)
789 error(value(vi_TERSE) ? gettext("No previous re") :
790 gettext("No previous regular expression"));
791 		if (c == '\n' && oknl == 0)
792 error(value(vi_TERSE) ? gettext("Missing closing delimiter") :
793 gettext("Missing closing delimiter for regular expression"));
794 		if (c != eof)
795 			ungetchar(c);
796 		return (eof);
797 	}
798 	gp = genbuf;
799 	if (c == '^') {
800 		*gp++ = c;
801 		c = getchar();
802 	}
803 	ungetchar(c);
804 	for (;;) {
805 		c = getchar();
806 		if (c == eof || c == EOF) {
807 			if (c == EOF)
808 				ungetchar(c);
809 			goto out;
810 		}
811 		if (gp >= &genbuf[LBSIZE - 3])
812 complex:
813 			cerror(value(vi_TERSE) ?
814 			    (unsigned char *)gettext("Re too complex") :
815 			    (unsigned char *)
816 			    gettext("Regular expression too complicated"));
817 
818 		if (!(isascii(c) || MB_CUR_MAX == 1)) {
819 			ungetchar(c);
820 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
821 				if ((gp + len) >= &genbuf[LBSIZE - 3])
822 					goto complex;
823 				strncpy(gp, multi, len);
824 				gp += len;
825 				continue;
826 			}
827 			(void) getchar();
828 		}
829 
830 		switch (c) {
831 
832 		case '\\':
833 			c = getchar();
834 			if (!isascii(c)) {
835 				ungetchar(c);
836 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
837 					if ((gp + len) >= &genbuf[LBSIZE - 3])
838 						goto complex;
839 					*gp++ = '\\';
840 					strncpy(gp, multi, len);
841 					gp += len;
842 					continue;
843 				}
844 				(void) getchar();
845 			}
846 
847 			switch (c) {
848 
849 			case '<':
850 			case '>':
851 			case '(':
852 			case ')':
853 			case '{':
854 			case '}':
855 			case '$':
856 			case '^':
857 			case '\\':
858 				*gp++ = '\\';
859 				*gp++ = c;
860 				continue;
861 
862 			case 'n':
863 				*gp++ = c;
864 				continue;
865 			}
866 			if(c >= '0' && c <= '9') {
867 				*gp++ = '\\';
868 				*gp++ = c;
869 				continue;
870 			}
871 			if (value(vi_MAGIC) == 0)
872 magic:
873 			switch (c) {
874 
875 			case '.':
876 				*gp++ = '.';
877 				continue;
878 
879 			case '~':
880 				rhsp = rhsbuf;
881 				while (*rhsp) {
882 					if (!isascii(*rhsp)) {
883 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) {
884 							if ((gp + len) >= &genbuf[LBSIZE-2])
885 								goto complex;
886 							strncpy(gp, rhsp, len);
887 							rhsp += len; gp += len;
888 							continue;
889 						}
890 					}
891 					len = 1;
892 					if (*rhsp == '\\') {
893 						c = *++rhsp;
894 						if (c == '&')
895 cerror(value(vi_TERSE) ? (unsigned char *)
896 gettext("Replacement pattern contains &") :
897 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re"));
898 						if (c >= '1' && c <= '9')
899 cerror(value(vi_TERSE) ? (unsigned char *)
900 gettext("Replacement pattern contains \\d") :
901 (unsigned char *)
902 gettext("Replacement pattern contains \\d - cannot use in re"));
903 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) {
904 							len = 1;
905 							if(any(c, ".\\*[$"))
906 								*gp++ = '\\';
907 						}
908 					}
909 
910 					if ((gp + len) >= &genbuf[LBSIZE-2])
911 						goto complex;
912 					if (len == 1) {
913 						c = *rhsp++;
914 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
915 					} else {
916 						strncpy(gp, rhsp, len);
917 						gp += len; rhsp += len;
918 					}
919 				}
920 				continue;
921 
922 			case '*':
923 				*gp++ = '*';
924 				continue;
925 
926 			case '[':
927 				*gp++ = '[';
928 				c = getchar();
929 				if (c == '^') {
930 					*gp++ = '^';
931 					c = getchar();
932 				}
933 
934 				do {
935 					if (!isascii(c) && c != EOF) {
936 						ungetchar(c);
937 						if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
938 							if ((gp + len)>= &genbuf[LBSIZE-4])
939 								goto complex;
940 							strncpy(gp, multi, len);
941 							gp += len;
942 							c = getchar();
943 							continue;
944 						}
945 						(void) getchar();
946 					}
947 
948 					if (gp >= &genbuf[LBSIZE-4])
949 						goto complex;
950 					if(c == '\\' && peekchar() == ']') {
951 						(void)getchar();
952 						*gp++ = '\\';
953 						*gp++ = ']';
954 					}
955 					else if (c == '\n' || c == EOF)
956 						cerror((unsigned char *)
957 						    gettext("Missing ]"));
958 					else
959 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
960 					c = getchar();
961 				} while(c != ']');
962 				*gp++ = ']';
963 				continue;
964 			}
965 			if (c == EOF) {
966 				ungetchar(EOF);
967 				*gp++ = '\\';
968 				*gp++ = '\\';
969 				continue;
970 			}
971 			if (c == '\n')
972 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") :
973 (unsigned char *)gettext("Can't escape newlines into regular expressions"));
974 			*gp++ = '\\';
975 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
976 			continue;
977 
978 		case '\n':
979 			if (oknl) {
980 				ungetchar(c);
981 				goto out;
982 			}
983 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") :
984 (unsigned char *)gettext("Missing closing delimiter for regular expression"));
985 			/* FALLTHROUGH */
986 
987 		case '.':
988 		case '~':
989 		case '*':
990 		case '[':
991 			if (value(vi_MAGIC))
992 				goto magic;
993 			if(c != '~')
994 				*gp++ = '\\';
995 			/* FALLTHROUGH */
996 defchar:
997 		default:
998 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
999 			continue;
1000 		}
1001 	}
1002 out:
1003 	*gp++ = '\0';
1004 
1005 #ifdef XPG4
1006 	/* see if our compiled RE's will fit in the re structure:	*/
1007 	if (regexc_size > EXPSIZ) {
1008 		/*
1009 		 * this should never happen. but it's critical that we
1010 		 * check here, otherwise .bss would get overwritten.
1011 		 */
1012 		cerror(value(vi_TERSE) ? (unsigned char *)
1013 		    gettext("RE's can't fit") :
1014 		    (unsigned char *)gettext("Regular expressions can't fit"));
1015 		return(eof);
1016 	}
1017 
1018 	/*
1019 	 * We create re each time we need it.
1020 	 */
1021 
1022 	if (re == NULL || re == scanre || re == subre) {
1023 		if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1024 			error(gettext("out of memory"));
1025 			exit(errcnt);
1026 		}
1027 	} else {
1028 		regex_comp_free(&re->Expbuf);
1029 		memset(re, 0, sizeof(struct regexp));
1030 	}
1031 
1032 	compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf
1033 	    + regexc_size);
1034 #else /* !XPG4 */
1035 	(void) _compile((const char *)genbuf, (char *)re->Expbuf,
1036 		(char *)(re->Expbuf + sizeof (re->Expbuf)), 1);
1037 #endif /* XPG4 */
1038 
1039 	if(regerrno)
1040 		switch(regerrno) {
1041 
1042 		case 42:
1043 cerror((unsigned char *)gettext("\\( \\) Imbalance"));
1044 			/* FALLTHROUGH */
1045 		case 43:
1046 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") :
1047 (unsigned char *)
1048 gettext("Too many \\('d subexpressions in a regular expression"));
1049 		case 50:
1050 			goto complex;
1051 		case 67:
1052 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") :
1053 (unsigned char *)gettext("Regular expression has illegal byte sequence"));
1054 		}
1055 	re->Nbra = nbra;
1056 	return(eof);
1057 }
1058 
1059 void
cerror(unsigned char * s)1060 cerror(unsigned char *s)
1061 {
1062 	if (re) {
1063 		re->Expbuf[0] = re->Expbuf[1] = 0;
1064 	}
1065 	error(s);
1066 }
1067 
1068 int
execute(int gf,line * addr)1069 execute(int gf, line *addr)
1070 {
1071 	unsigned char *p1, *p2;
1072 	char *start;
1073 	int c, i;
1074 	int ret;
1075 	int	len;
1076 
1077 	if (gf) {
1078 		if (re == NULL || re->Expbuf[0])
1079 			return (0);
1080 		if(value(vi_IGNORECASE)) {
1081 			p1 = genbuf;
1082 			p2 = (unsigned char *)loc2;
1083 			while(c = *p2) {
1084 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1085 					len = 1;
1086 				if (len == 1) {
1087 					*p1++ = tolower(c);
1088 					p2++;
1089 					continue;
1090 				}
1091 				strncpy(p1, p2, len);
1092 				p1 += len; p2 += len;
1093 			}
1094 			*p1 = '\0';
1095 			locs = (char *)genbuf;
1096 			p1 = genbuf;
1097 			start = loc2;
1098 		} else {
1099 			p1 = (unsigned char *)loc2;
1100 			locs = loc2;
1101 		}
1102 	} else {
1103 		if (addr == zero)
1104 			return (0);
1105 		p1 = linebuf;
1106 		getaline(*addr);
1107 		if(value(vi_IGNORECASE)) {
1108 			p1 = genbuf;
1109 			p2 = linebuf;
1110 			while(c = *p2) {
1111 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1112 					len = 1;
1113 				if (len == 1) {
1114 					*p1++ = tolower(c);
1115 					p2++;
1116 					continue;
1117 				}
1118 				strncpy(p1, p2, len);
1119 				p1 += len; p2 += len;
1120 			}
1121 			*p1 = '\0';
1122 			p1 = genbuf;
1123 			start = (char *)linebuf;
1124 		}
1125 		locs = (char *)0;
1126 	}
1127 
1128 	ret = step((char *)p1, (char *)re->Expbuf);
1129 
1130 	if(value(vi_IGNORECASE) && ret) {
1131 		loc1 = start + (loc1 - (char *)genbuf);
1132 		loc2 = start + (loc2 - (char *)genbuf);
1133 		for(i = 0; i < NBRA; i++) {
1134 			braslist[i] = start + (braslist[i] - (char *)genbuf);
1135 			braelist[i] = start + (braelist[i] - (char *)genbuf);
1136 		}
1137 	}
1138 	return ret;
1139 }
1140 
1141 /*
1142  *  Initialize the compiled regular-expression storage areas (called from
1143  *  main()).
1144  */
1145 
init_re(void)1146 void init_re (void)
1147 {
1148 #ifdef XPG4
1149 	re = scanre = subre = NULL;
1150 #else /* !XPG4 */
1151 	if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1152 		error(gettext("out of memory"));
1153 		exit(errcnt);
1154 	}
1155 
1156 	if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) {
1157 		error(gettext("out of memory"));
1158 		exit(errcnt);
1159 	}
1160 
1161 	if ((subre = calloc(1, sizeof(struct regexp))) == NULL) {
1162 		error(gettext("out of memory"));
1163 		exit(errcnt);
1164 	}
1165 #endif /* XPG4 */
1166 }
1167 
1168 /*
1169  *  Save what is in the special place re to the named alternate
1170  *  location.  This means freeing up what's currently in this target
1171  *  location, if necessary.
1172  */
1173 
savere(struct regexp ** a)1174 void savere(struct regexp ** a)
1175 {
1176 #ifdef XPG4
1177 	if (a == NULL || re == NULL) {
1178 		return;
1179 	}
1180 
1181 	if (*a == NULL) {
1182 		*a = re;
1183 		return;
1184 	}
1185 
1186 	if (*a != re) {
1187 		if (scanre != subre) {
1188 			regex_comp_free(&((*a)->Expbuf));
1189 			free(*a);
1190 		}
1191 		*a = re;
1192 	}
1193 #else /* !XPG4 */
1194 	memcpy(*a, re, sizeof(struct regexp));
1195 #endif /* XPG4 */
1196 }
1197 
1198 
1199 /*
1200  *  Restore what is in the named alternate location to the special place
1201  *  re.  This means first freeing up what's currently in re, if necessary.
1202  */
1203 
resre(struct regexp * a)1204 void resre(struct regexp * a)
1205 {
1206 #ifdef XPG4
1207 	if (a == NULL) {
1208 		return;
1209 	}
1210 
1211 	if (re == NULL) {
1212 		re = a;
1213 		return;
1214 	}
1215 
1216 	if (a != re) {
1217 		if ((re != scanre) && (re != subre)) {
1218 			regex_comp_free(&re->Expbuf);
1219 			free(re);
1220 		}
1221 
1222 		re = a;
1223 	}
1224 #else /* !XPG4 */
1225 	memcpy(re, a, sizeof(struct regexp));
1226 #endif /* XPG4 */
1227 }
1228