xref: /illumos-gate/usr/src/cmd/vi/misc/ctags.c (revision 45dce8f0)
1 /*
2  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*	Copyright (c) 1988 AT&T	*/
6 /*	All Rights Reserved	*/
7 
8 /*
9  * Copyright (c) 1980 Regents of the University of California.
10  * All rights reserved.  The Berkeley software License Agreement
11  * specifies the terms and conditions for redistribution.
12  */
13 
14 /*
15  *   Modify ctags to handle C++ in C_entries(), etc:
16  *	-  Handles C++ comment token "//"
17  *	-  Handles C++ scope operator "::".
18  *		This helps to distinguish between xyz()
19  *	   definition and X::xyz() definition.
20  *	-  Recognizes C++ reserved word "class" in typedef processing
21  *		(for "-t" option)
22  *	-  Handles Sun C++ special file name extensions: .c, .C, .cc, and .cxx.
23  *	-  Handles overloaded unary/binary operator names
24  *   Doesn't handle yet:
25  *	-  inline functions in class definition (currently they get
26  *		swallowed within a class definition)
27  *	-  Tags with scope operator :: with spaces in between,
28  *		e.g. classz ::afunc
29  *
30  *   Enhance operator functions support:
31  *	-  Control flow involving operator tokens scanning are
32  *	   consistent with that of other function tokens - original
33  *	   hacking method for 2.0 is removed.  This will accurately
34  *	   identify tags for declarations of the form 'operator+()'
35  *	   (bugid 1027806) as well as allowing spaces in between
36  *	   'operator' and 'oprtk', e.g. 'operator + ()'.
37  *
38  */
39 
40 #ifndef lint
41 char copyright[] = "@(#) Copyright (c) 1980 Regents of the University of "
42 			"California.\nAll rights reserved.\n";
43 #endif
44 
45 #include <stdio.h>
46 #include <ctype.h>
47 #include <locale.h>
48 #include <unistd.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <strings.h>
52 #include <limits.h>
53 #include <sys/types.h>
54 #include <sys/stat.h>
55 
56 /*
57  * ctags: create a tags file
58  */
59 
60 #define	bool	char
61 
62 #define	TRUE	(1)
63 #define	FALSE	(0)
64 
65 #define	CPFLAG	3			/* # of bytes in a flag		*/
66 
67 #define	iswhite(arg)	(_wht[arg])	/* T if char is white		*/
68 #define	begtoken(arg)	(_btk[arg])	/* T if char can start token	*/
69 #define	intoken(arg)	(_itk[arg])	/* T if char can be in token	*/
70 #define	endtoken(arg)	(_etk[arg])	/* T if char ends tokens	*/
71 #define	isgood(arg)	(_gd[arg])	/* T if char can be after ')'	*/
72 
73 #define	optoken(arg)	(_opr[arg])	/* T if char can be		*/
74 					/* an overloaded operator token	*/
75 
76 #define	max(I1, I2)	(I1 > I2 ? I1 : I2)
77 
78 struct	nd_st {			/* sorting structure			*/
79 	char	*entry;			/* function or type name	*/
80 	char	*file;			/* file name			*/
81 	bool	f;			/* use pattern or line no	*/
82 	int	lno;			/* for -x option		*/
83 	char	*pat;			/* search pattern		*/
84 	bool	been_warned;		/* set if noticed dup		*/
85 	struct	nd_st	*left, *right;	/* left and right sons		*/
86 };
87 
88 long	ftell();
89 typedef	struct	nd_st	NODE;
90 
91 static bool
92 	number,				/* T if on line starting with #	*/
93 	gotone,				/* found a func already on line	*/
94 					/* boolean "func" (see init)	*/
95 	_wht[0177], _etk[0177], _itk[0177], _btk[0177], _gd[0177];
96 
97 /* boolean array for overloadable operator symbols			*/
98 static bool	_opr[0177];
99 
100 	/*
101 	 * typedefs are recognized using a simple finite automata,
102 	 * tydef is its state variable.
103 	 */
104 typedef enum {none, begin, begin_rec, begin_tag, middle, end } TYST;
105 
106 static TYST tydef = none;
107 
108 static char	searchar = '/';		/* use /.../ searches		*/
109 
110 #define	LINEBUFSIZ	4*BUFSIZ
111 
112 static int	lineno;			/* line number of current line */
113 static char
114 	line[LINEBUFSIZ],	/* current input line			*/
115 	*curfile,		/* current input file name		*/
116 	*outfile = "tags",	/* output file				*/
117 	*white	= " \f\t\n",	/* white chars				*/
118 	*endtk	= " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
119 				/* token ending chars			*/
120 	*begtk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
121 				/* token starting chars			*/
122 	*intk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
123 		    "0123456789",
124 				/* valid in-token chars			*/
125 	*notgd	= ",;";		/* non-valid after-function chars	*/
126 
127 static char	*oprtk	= " =-+%*/&|^~!<>[]()";	/* overloadable operators */
128 
129 static int	file_num;	/* current file number			*/
130 static int	aflag;		/* -a: append to tags */
131 
132 #ifndef XPG4			/* XPG4: handle typedefs by default	*/
133 static int	tflag;		/* -t: create tags for typedefs		*/
134 #endif /*  !XPG4 */
135 
136 static int	uflag;		/* -u: update tags			*/
137 static int	wflag;		/* -w: suppress warnings		*/
138 static int	vflag;		/* -v: create vgrind style index output */
139 static int	xflag;		/* -x: create cxref style output	*/
140 
141 static char	lbuf[LINEBUFSIZ];
142 
143 static FILE
144 	*inf,			/* ioptr for current input file		*/
145 	*outf;			/* ioptr for tags file			*/
146 
147 static long	lineftell;	/* ftell after getc( inf ) == '\n'	*/
148 
149 static NODE	*head;		/* the head of the sorted binary tree	*/
150 
151 static int	infile_fail;	/* Count of bad opens. Fix bug ID #1082298 */
152 
153 static char	*dbp = lbuf;
154 static int	pfcnt;
155 
156 static int	mac;		/* our modified argc, after parseargs() */
157 static char	**mav;		/* our modified argv, after parseargs() */
158 
159 
160 /* our local functions:							*/
161 static void	init(void);
162 static void	find_entries(char *);
163 static void	pfnote(char *, int, bool);
164 static void	C_entries(void);
165 static int	start_entry(char **, char *, int *);
166 static void	Y_entries(void);
167 static char	*toss_comment(char *);
168 static void	getaline(long int);
169 static void	free_tree(NODE *);
170 static void	add_node(NODE *, NODE *);
171 static void	put_entries(NODE *);
172 static int	PF_funcs(FILE *);
173 static int	tail(char *);
174 static void	takeprec(void);
175 static void	getit(void);
176 static char	*savestr(char *);
177 static void	L_funcs(FILE *);
178 static void	L_getit(int);
179 static int	striccmp(char *, char *);
180 static int	first_char(void);
181 static void	toss_yysec(void);
182 static void	Usage(void);
183 static void	parseargs(int, char **);
184 
185 int
main(int ac,char * av[])186 main(int ac, char *av[])
187 {
188 	int i;
189 	char cmd[100];
190 
191 	(void) setlocale(LC_ALL, "");
192 #if !defined(TEXT_DOMAIN)
193 #define	TEXT_DOMAIN "SYS_TEST"
194 #endif
195 	(void) textdomain(TEXT_DOMAIN);
196 
197 	parseargs(ac, av);
198 
199 	while ((i = getopt(mac, mav, "aBFtuvwxf:")) != EOF) {
200 		switch (i) {
201 		case 'a':	/* -a: Append output to existing tags file */
202 			aflag++;
203 			break;
204 
205 		case 'B':	/* -B: Use backward search patterns (?...?) */
206 			searchar = '?';
207 			break;
208 
209 		case 'F':	/* -F: Use forward search patterns (/.../) */
210 			searchar = '/';
211 			break;
212 
213 		case 't':	/* -t: Create tags for typedefs.	*/
214 				/* for XPG4 , we silently ignore "-t".	*/
215 #ifndef XPG4
216 			tflag++;
217 #endif /*  !XPG4 */
218 			break;
219 
220 		case 'u':	/* -u: Update the specified tags file	*/
221 			uflag++;
222 			break;
223 
224 		case 'v':	/* -v: Index listing on stdout		*/
225 			vflag++;
226 			xflag++;
227 			break;
228 
229 		case 'w':	/* -w: Suppress warnings		*/
230 			wflag++;
231 			break;
232 
233 		case 'x':	/* -x: Produce a simple index		*/
234 			xflag++;
235 			break;
236 
237 		case 'f':	/* -f tagsfile: output to tagsfile	*/
238 			outfile = strdup(optarg);
239 			break;
240 
241 		default:
242 			Usage();	/* never returns		*/
243 			break;
244 		}
245 	}
246 
247 	/* if we didn't specify any source code to parse, complain and die. */
248 	if (optind == mac) {
249 		Usage();	/* never returns		*/
250 	}
251 
252 
253 	init();			/* set up boolean "functions"		*/
254 	/*
255 	 * loop through files finding functions
256 	 */
257 	for (file_num = optind; file_num < mac; file_num++)
258 		find_entries(mav[file_num]);
259 
260 	if (xflag) {
261 		put_entries(head);
262 		exit(infile_fail > 0 ? 2 : 0); /* Fix for 1082298 */
263 	}
264 	if (uflag) {
265 		for (i = 1; i < mac; i++) {
266 			(void) sprintf(cmd,
267 			"mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
268 				outfile, mav[i], outfile);
269 			(void) system(cmd);
270 		}
271 		aflag++;
272 	}
273 	outf = fopen(outfile, aflag ? "a" : "w");
274 	if (outf == NULL) {
275 		perror(outfile);
276 		exit(1);
277 	}
278 	put_entries(head);
279 	(void) fclose(outf);
280 	if (uflag) {
281 		(void) sprintf(cmd, "sort %s -o %s", outfile, outfile);
282 		(void) system(cmd);
283 	}
284 	return (infile_fail > 0 ? 2 : 0); /* Fix for #1082298 */
285 }
286 
287 /*
288  * This routine sets up the boolean psuedo-functions which work
289  * by seting boolean flags dependent upon the corresponding character
290  * Every char which is NOT in that string is not a white char.  Therefore,
291  * all of the array "_wht" is set to FALSE, and then the elements
292  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
293  * of a char is TRUE if it is the string "white", else FALSE.
294  */
295 static void
init(void)296 init(void)
297 {
298 	char	*sp;
299 	int	i;
300 
301 	for (i = 0; i < 0177; i++) {
302 		_wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
303 		_opr[i] = FALSE;	/* initialize boolean		*/
304 					/* array of operator symbols	*/
305 		_gd[i] = TRUE;
306 	}
307 	for (sp = white; *sp; sp++)
308 		_wht[*sp] = TRUE;
309 	for (sp = endtk; *sp; sp++)
310 		_etk[*sp] = TRUE;
311 	for (sp = intk; *sp; sp++)
312 		_itk[*sp] = TRUE;
313 	for (sp = begtk; *sp; sp++)
314 		_btk[*sp] = TRUE;
315 
316 	/* mark overloadable operator symbols				*/
317 	for (sp = oprtk; *sp; sp++)
318 		_opr[*sp] = TRUE;
319 
320 	for (sp = notgd; *sp; sp++)
321 		_gd[*sp] = FALSE;
322 }
323 
324 /*
325  * This routine opens the specified file and calls the function
326  * which finds the function and type definitions.
327  */
328 static void
find_entries(char * file)329 find_entries(char *file)
330 {
331 	char *cp;
332 	struct stat st;
333 
334 	/* skip anything that isn't a regular file */
335 	if (stat(file, &st) == 0 && !S_ISREG(st.st_mode))
336 		return;
337 
338 	if ((inf = fopen(file, "r")) == NULL) {
339 		perror(file);
340 		infile_fail++;		/* Count bad opens. ID #1082298 */
341 		return;
342 	}
343 	curfile = savestr(file);
344 	lineno = 0;
345 	cp = strrchr(file, '.');
346 	/* .l implies lisp or lex source code */
347 	if (cp && cp[1] == 'l' && cp[2] == '\0') {
348 		if (strchr(";([", first_char()) != NULL)	/* lisp */
349 		{
350 			L_funcs(inf);
351 			(void) fclose(inf);
352 			return;
353 		} else {					/* lex */
354 			/*
355 			 * throw away all the code before the second "%%"
356 			 */
357 			toss_yysec();
358 			getaline(lineftell);
359 			pfnote("yylex", lineno, TRUE);
360 			toss_yysec();
361 			C_entries();
362 			(void) fclose(inf);
363 			return;
364 		}
365 	}
366 	/* .y implies a yacc file */
367 	if (cp && cp[1] == 'y' && cp[2] == '\0') {
368 		toss_yysec();
369 		Y_entries();
370 		C_entries();
371 		(void) fclose(inf);
372 		return;
373 	}
374 
375 	/*
376 	 * Add in file name extension support for Sun C++ which
377 	 * permits .C/.c (AT&T), .cc (G++) and .cxx (Gloksp.)
378 	 */
379 
380 	/* if not a .c, .C, .cc, .cxx or .h file, try fortran */
381 	if (cp && (cp[1] != 'C' && cp[1] != 'c' && cp[1] != 'h') &&
382 	    cp[2] == '\0' && (strcmp(cp, ".cc") == 0) &&
383 	    (strcmp(cp, ".cxx") == 0)) {
384 		if (PF_funcs(inf) != 0) {
385 			(void) fclose(inf);
386 			return;
387 		}
388 		rewind(inf);	/* no fortran tags found, try C */
389 	}
390 	C_entries();
391 	(void) fclose(inf);
392 }
393 
394 static void
pfnote(char * name,int ln,bool f)395 pfnote(char *name, int ln, bool f)
396 {
397 	char *fp;
398 	NODE *np;
399 	char *nametk;	/* hold temporary tokens from name */
400 	char nbuf[BUFSIZ];
401 
402 	if ((np = malloc(sizeof (NODE))) == NULL) {
403 		(void) fprintf(stderr,
404 				gettext("ctags: too many entries to sort\n"));
405 		put_entries(head);
406 		free_tree(head);
407 		head = np = (NODE *) malloc(sizeof (NODE));
408 	}
409 	if (xflag == 0 && (strcmp(name, "main") == 0)) {
410 		fp = strrchr(curfile, '/');
411 
412 		if (fp == 0)
413 			fp = curfile;
414 		else
415 			fp++;
416 		(void) sprintf(nbuf, "M%s", fp);
417 		fp = strrchr(nbuf, '.');
418 		/* Chop off .cc and .cxx as well as .c, .h, etc		*/
419 		if (fp && ((fp[2] == 0) || (fp[2] == 'c' && fp[3] == 0) ||
420 			    (fp[3] == 'x' && fp[4] == 0)))
421 			*fp = 0;
422 		name = nbuf;
423 	}
424 
425 	/* remove in-between blanks operator function tags */
426 	if (strchr(name, ' ') != NULL)
427 	{
428 		(void) strcpy(name, strtok(name, " "));
429 		while ((nametk = strtok(0, " ")) != NULL)
430 			(void) strcat(name, nametk);
431 	}
432 	np->entry = savestr(name);
433 	np->file = curfile;
434 	np->f = f;
435 	np->lno = ln;
436 	np->left = np->right = 0;
437 	if (xflag == 0) {
438 		lbuf[50] = 0;
439 		(void) strcat(lbuf, "$");
440 		lbuf[50] = 0;
441 	}
442 	np->pat = savestr(lbuf);
443 	if (head == NULL)
444 		head = np;
445 	else
446 		add_node(np, head);
447 }
448 
449 /*
450  * This routine finds functions and typedefs in C syntax and adds them
451  * to the list.
452  */
453 static void
C_entries(void)454 C_entries(void)
455 {
456 	int c;
457 	char *token, *tp;
458 	bool incomm, inquote, inchar, midtoken, isoperator, optfound;
459 	int level;
460 	char *sp;
461 	char tok[BUFSIZ];
462 	long int tokftell;
463 
464 	number = gotone = midtoken = inquote = inchar =
465 	incomm = isoperator = optfound = FALSE;
466 
467 	level = 0;
468 	sp = tp = token = line;
469 	lineno++;
470 	lineftell = tokftell = ftell(inf);
471 	for (;;) {
472 		*sp = c = getc(inf);
473 		if (feof(inf))
474 			break;
475 		if (c == '\n') {
476 			lineftell = ftell(inf);
477 			lineno++;
478 		} else if (c == '\\') {
479 			c = *++sp = getc(inf);
480 			if ((c == '\n') || (c == EOF)) { /* c == EOF, 1091005 */
481 				lineftell = ftell(inf);
482 				lineno++;
483 				c = ' ';
484 			}
485 		} else if (incomm) {
486 			if (c == '*') {
487 				while ((*++sp = c = getc(inf)) == '*')
488 					continue;
489 
490 				/* c == EOF 1091005			*/
491 				if ((c == '\n') || (c == EOF)) {
492 					lineftell = ftell(inf);
493 					lineno++;
494 				}
495 
496 				if (c == '/')
497 					incomm = FALSE;
498 			}
499 		} else if (inquote) {
500 			/*
501 			 * Too dumb to know about \" not being magic, but
502 			 * they usually occur in pairs anyway.
503 			 */
504 			if (c == '"')
505 				inquote = FALSE;
506 			continue;
507 		} else if (inchar) {
508 			if (c == '\'')
509 				inchar = FALSE;
510 			continue;
511 		} else if (midtoken == TRUE) {	/* if white space omitted */
512 			goto dotoken;
513 		} else switch (c) {
514 		    case '"':
515 			inquote = TRUE;
516 			continue;
517 		    case '\'':
518 			inchar = TRUE;
519 			continue;
520 		    case '/':
521 			*++sp = c = getc(inf);
522 			/* Handles the C++ comment token "//"		*/
523 			if (c == '*')
524 				incomm = TRUE;
525 			else if (c == '/') {
526 				/*
527 				 * Skip over all the characters after
528 				 * "//" until a newline character. Now also
529 				 * includes fix for 1091005, check for EOF.
530 				 */
531 				do  {
532 					c = getc(inf);
533 				/* 1091005:				*/
534 				} while ((c != '\n') && (c != EOF));
535 
536 
537 				/*
538 				 * Fixed bugid 1030014
539 				 * Return the current position of the
540 				 * file after the newline.
541 				 */
542 				lineftell = ftell(inf);
543 				lineno++;
544 				*--sp = c;
545 			}
546 			else
547 				(void) ungetc(*sp, inf);
548 			continue;
549 		    case '#':
550 			if (sp == line)
551 				number = TRUE;
552 			continue;
553 		    case '{':
554 			if ((tydef == begin_rec) || (tydef == begin_tag)) {
555 				tydef = middle;
556 			}
557 			level++;
558 			continue;
559 		    case '}':
560 			/*
561 			 * Heuristic for function or structure end;
562 			 * common for #ifdef/#else blocks to add extra "{"
563 			 */
564 			if (sp == line)
565 				level = 0;	/* reset */
566 			else
567 				level--;
568 			if (!level && tydef == middle) {
569 				tydef = end;
570 			}
571 			if (!level && tydef == none) /* Fix for #1034126 */
572 				goto dotoken;
573 			continue;
574 		}
575 
576 dotoken:
577 
578 
579 		if (!level && !inquote && !incomm && gotone == FALSE) {
580 			if (midtoken) {
581 				if (endtoken(c)) {
582 
583 				/*
584 				 *
585 				 *    ':'  +---> ':' -> midtok
586 				 *
587 				 *    +---> operator{+,-, etc} -> midtok
588 				 *		(continue)
589 				 *    +---> endtok
590 				 */
591 		/*
592 		 * Enhance operator function support and
593 		 *	fix bugid 1027806
594 		 *
595 		 *  For operator token, scanning will continue until
596 		 *  '(' is found.  Spaces between 'operater' and
597 		 *  'oprtk' are allowed (e.g. 'operator + ()'), but
598 		 *  will be removed when the actual entry for the tag
599 		 *  is made.
600 		 *  Note that functions of the form 'operator ()(int)'
601 		 *  will be recognized, but 'operator ()' will not,
602 		 *  even though this is legitimate in C.
603 		 */
604 
605 					if (optoken(c)) {
606 					    if (isoperator) {
607 					    if (optfound) {
608 						    if (c != '(') {
609 						    tp++;
610 						    goto next_char;
611 						    }
612 					    } else {
613 						    if (c != ' ') {
614 						    optfound = TRUE;
615 						    }
616 						    tp++;
617 						    goto next_char;
618 					    }
619 					    } else {
620 				/* start: this code shifted left for cstyle */
621 				char *backptr = tp - 7;
622 				if (strncmp(backptr, "operator", 8) == 0) {
623 					/* This is an overloaded operator */
624 					isoperator = TRUE;
625 					if (c != ' ') {
626 						optfound = TRUE;
627 					}
628 
629 					tp++;
630 					goto next_char;
631 				} else if (c == '~') {
632 					/* This is a destructor		*/
633 					tp++;
634 					goto next_char;
635 				}
636 				/* end: above code shifted left for cstyle */
637 					}
638 					} else if (c == ':') {
639 					    if ((*++sp = getc(inf)) == ':') {
640 						tp += 2;
641 						c = *sp;
642 						goto next_char;
643 					    } else {
644 						(void) ungetc (*sp, inf);
645 						--sp;
646 					    }
647 					}
648 
649 				/* start: this code shifted left for cstyle */
650 				{
651 				int f;
652 				int pfline = lineno;
653 
654 				if (start_entry(&sp, token, &f)) {
655 					(void) strncpy(tok, token, tp-token+1);
656 					tok[tp-token+1] = 0;
657 					getaline(tokftell);
658 					pfnote(tok, pfline, f);
659 					gotone = f;	/* function */
660 				}
661 
662 				isoperator = optfound = midtoken = FALSE;
663 				token = sp;
664 				}
665 				/* end: above code shifted left for cstyle */
666 				} else if (intoken(c))
667 					tp++;
668 			} else if (begtoken(c)) {
669 				token = tp = sp;
670 				midtoken = TRUE;
671 				tokftell = lineftell;
672 			}
673 		}
674 	next_char:
675 		if (c == ';' && tydef == end)	/* clean with typedefs */
676 			tydef = none;
677 		sp++;
678 			/* The "c == }" was added to fix #1034126 */
679 		if (c == '\n' ||c == '}'|| sp > &line[sizeof (line) - BUFSIZ]) {
680 			tp = token = sp = line;
681 			number = gotone = midtoken = inquote =
682 			inchar = isoperator = optfound = FALSE;
683 		}
684 	}
685 }
686 
687 /*
688  * This routine  checks to see if the current token is
689  * at the start of a function, or corresponds to a typedef
690  * It updates the input line * so that the '(' will be
691  * in it when it returns.
692  */
693 static int
start_entry(char ** lp,char * token,int * f)694 start_entry(char **lp, char *token, int *f)
695 {
696 	char	*sp;
697 	int	c;
698 	static	bool	found;
699 	bool	firsttok;	/* T if have seen first token in ()'s	*/
700 	int	bad;
701 
702 	*f = 1;			/* a function */
703 	sp = *lp;
704 	c = *sp;
705 	bad = FALSE;
706 	if (!number) {		/* space is not allowed in macro defs	*/
707 		while (iswhite(c)) {
708 			*++sp = c = getc(inf);
709 			if ((c == '\n') || (c == EOF)) { /* c==EOF, #1091005 */
710 				lineno++;
711 				lineftell = ftell(inf);
712 				if (sp > &line[sizeof (line) - BUFSIZ])
713 					goto ret;
714 			}
715 		}
716 	/* the following tries to make it so that a #define	a b(c)	*/
717 	/* doesn't count as a define of b.				*/
718 	} else {
719 		if (strncmp(token, "define", 6) == 0)
720 			found = 0;
721 		else
722 			found++;
723 		if (found >= 2) {
724 			gotone = TRUE;
725 badone:			bad = TRUE;
726 			goto ret;
727 		}
728 	}
729 	/* check for the typedef cases		*/
730 #ifdef XPG4
731 	if (strncmp(token, "typedef", 7) == 0) {
732 #else /*  !XPG4 */
733 	if (tflag && (strncmp(token, "typedef", 7) == 0)) {
734 #endif /*  XPG4 */
735 		tydef = begin;
736 		goto badone;
737 	}
738 	/* Handles 'class' besides 'struct' etc.			*/
739 	if (tydef == begin && ((strncmp(token, "struct", 6) == 0) ||
740 			    (strncmp(token, "class", 5) == 0) ||
741 			    (strncmp(token, "union", 5) == 0)||
742 			    (strncmp(token, "enum", 4) == 0))) {
743 		tydef = begin_rec;
744 		goto badone;
745 	}
746 	if (tydef == begin) {
747 		tydef = end;
748 		goto badone;
749 	}
750 	if (tydef == begin_rec) {
751 		tydef = begin_tag;
752 		goto badone;
753 	}
754 	if (tydef == begin_tag) {
755 		tydef = end;
756 		goto gottydef;	/* Fall through to "tydef==end" */
757 	}
758 
759 gottydef:
760 	if (tydef == end) {
761 		*f = 0;
762 		goto ret;
763 	}
764 	if (c != '(')
765 		goto badone;
766 	firsttok = FALSE;
767 	while ((*++sp = c = getc(inf)) != ')') {
768 		if ((c == '\n') || (c == EOF)) { /* c == EOF Fix for #1091005 */
769 			lineftell = ftell(inf);
770 			lineno++;
771 			if (sp > &line[sizeof (line) - BUFSIZ])
772 				goto ret;
773 		}
774 		/*
775 		 * This line used to confuse ctags:
776 		 *	int	(*oldhup)();
777 		 * This fixes it. A nonwhite char before the first
778 		 * token, other than a / (in case of a comment in there)
779 		 * makes this not a declaration.
780 		 */
781 		if (begtoken(c) || c == '/')
782 			firsttok = TRUE;
783 		else if (!iswhite(c) && !firsttok)
784 			goto badone;
785 	}
786 	while (iswhite(*++sp = c = getc(inf)))
787 		if ((c == '\n') || (c == EOF)) { /* c == EOF fix for #1091005 */
788 			lineno++;
789 			lineftell = ftell(inf);
790 			if (sp > &line[sizeof (line) - BUFSIZ])
791 				break;
792 		}
793 ret:
794 	*lp = --sp;
795 	if (c == '\n')
796 		lineno--;
797 	(void) ungetc(c, inf);
798 	return (!bad && (!*f || isgood(c)));
799 					/* hack for typedefs */
800 }
801 
802 /*
803  * Y_entries:
804  *	Find the yacc tags and put them in.
805  */
806 static void
807 Y_entries(void)
808 {
809 	char	*sp, *orig_sp;
810 	int	brace;
811 	bool	in_rule = FALSE;
812 	size_t	toklen;
813 	char	tok[LINEBUFSIZ];
814 
815 	brace = 0;
816 	getaline(lineftell);
817 	pfnote("yyparse", lineno, TRUE);
818 	while (fgets(line, sizeof (line), inf) != NULL) {
819 		for (sp = line; *sp; sp++) {
820 			switch (*sp) {
821 			case '\n':
822 				lineno++;
823 				/* FALLTHROUGH */
824 			case ' ':
825 			case '\t':
826 			case '\f':
827 			case '\r':
828 				break;
829 			case '"':
830 			case '\'': {
831 				char start = *sp;
832 				sp++;
833 
834 				while ((*sp != '\0') && (*sp != start)) {
835 					if (*sp == '\\')
836 						sp++; /* Skip escaped thing */
837 					sp++;
838 				}
839 
840 				if (*sp == '\0')
841 					sp--;
842 				break;
843 			}
844 			case '/':
845 				if (*++sp == '*')
846 					sp = toss_comment(sp);
847 				else
848 					--sp;
849 				break;
850 			case '{':
851 				brace++;
852 				break;
853 			case '}':
854 				brace--;
855 				break;
856 			case '%':
857 				if (sp[1] == '%' && sp == line)
858 					return;
859 				break;
860 			case '|':
861 			case ';':
862 				in_rule = FALSE;
863 				break;
864 			default:
865 				if (brace == 0 && !in_rule && (isalpha(*sp) ||
866 				    *sp == '.' ||
867 				    *sp == '_')) {
868 					orig_sp = sp;
869 					++sp;
870 					while (isalnum(*sp) || *sp == '_' ||
871 					    *sp == '.')
872 						sp++;
873 					toklen = sp - orig_sp;
874 					while (isspace(*sp))
875 						sp++;
876 					if (*sp == ':' || (*sp == '\0' &&
877 					    first_char() == ':')) {
878 						(void) strncpy(tok,
879 						    orig_sp, toklen);
880 						tok[toklen] = '\0';
881 						(void) strcpy(lbuf, line);
882 						lbuf[strlen(lbuf) - 1] = '\0';
883 						pfnote(tok, lineno, TRUE);
884 						in_rule = TRUE;
885 						/*
886 						 * if we read NUL, leave it so
887 						 * we read the next line
888 						 */
889 						if (*sp == '\0')
890 							sp--;
891 					} else {
892 						sp--;
893 					}
894 				}
895 				break;
896 			}
897 		}
898 	}
899 }
900 
901 static char *
902 toss_comment(char *start)
903 {
904 	char	*sp;
905 
906 	/*
907 	 * first, see if the end-of-comment is on the same line
908 	 */
909 	do {
910 		while ((sp = strchr(start, '*')) != NULL)
911 			if (sp[1] == '/')
912 				return (++sp);
913 			else
914 				start = (++sp);
915 		start = line;
916 		lineno++;
917 	} while (fgets(line, sizeof (line), inf) != NULL);
918 
919 	/*
920 	 * running this through lint revealed that the original version
921 	 * of this routine didn't explicitly return something; while
922 	 * the return value was always used!. so i've added this
923 	 * next line.
924 	 */
925 	return (sp);
926 }
927 
928 static void
929 getaline(long int where)
930 {
931 	long saveftell = ftell(inf);
932 	char *cp;
933 
934 	(void) fseek(inf, where, 0);
935 	(void) fgets(lbuf, sizeof (lbuf), inf);
936 	cp = strrchr(lbuf, '\n');
937 	if (cp)
938 		*cp = 0;
939 	(void) fseek(inf, saveftell, 0);
940 }
941 
942 static void
943 free_tree(NODE *node)
944 {
945 	NODE *next;
946 	while (node) {
947 		free_tree(node->right);
948 		next = node->left;
949 		free(node);
950 		node = next;
951 	}
952 }
953 
954 static void
955 add_node(NODE *node, NODE *cur_node)
956 {
957 	int dif;
958 
959 	dif = strcmp(node->entry, cur_node->entry);
960 	if (dif == 0) {
961 		if (node->file == cur_node->file) {
962 			if (!wflag) {
963 			(void) fprintf(stderr,
964 			gettext("Duplicate entry in file %s, line %d: %s\n"),
965 			node->file, lineno, node->entry);
966 			(void) fprintf(stderr,
967 					gettext("Second entry ignored\n"));
968 			}
969 			return;
970 		}
971 		if (!cur_node->been_warned)
972 			if (!wflag) {
973 				(void) fprintf(stderr, gettext("Duplicate "
974 					    "entry in files %s and %s: %s "
975 					    "(Warning only)\n"),
976 					    node->file, cur_node->file,
977 					    node->entry);
978 			}
979 		cur_node->been_warned = TRUE;
980 		return;
981 	}
982 
983 	if (dif < 0) {
984 		if (cur_node->left != NULL)
985 			add_node(node, cur_node->left);
986 		else
987 			cur_node->left = node;
988 		return;
989 	}
990 	if (cur_node->right != NULL)
991 		add_node(node, cur_node->right);
992 	else
993 		cur_node->right = node;
994 }
995 
996 static void
997 put_entries(NODE *node)
998 {
999 	char	*sp;
1000 
1001 	if (node == NULL)
1002 		return;
1003 	put_entries(node->left);
1004 
1005 	/*
1006 	 * while the code in the following #ifdef section could be combined,
1007 	 * it's explicitly separated here to make maintainance easier.
1008 	 */
1009 #ifdef XPG4
1010 	/*
1011 	 * POSIX 2003: we no longer have a "-t" flag; the logic is
1012 	 * automatically assumed to be "turned on" here.
1013 	 */
1014 	if (xflag == 0) {
1015 			(void) fprintf(outf, "%s\t%s\t%c^",
1016 				node->entry, node->file, searchar);
1017 			for (sp = node->pat; *sp; sp++)
1018 				if (*sp == '\\')
1019 					(void) fprintf(outf, "\\\\");
1020 				else if (*sp == searchar)
1021 					(void) fprintf(outf, "\\%c", searchar);
1022 				else
1023 					(void) putc(*sp, outf);
1024 			(void) fprintf(outf, "%c\n", searchar);
1025 	} else if (vflag)
1026 		(void) fprintf(stdout, "%s %s %d\n",
1027 				node->entry, node->file, (node->lno+63)/64);
1028 	else
1029 		(void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1030 			node->entry, node->lno, node->file, node->pat);
1031 #else /* XPG4 */
1032 	/*
1033 	 * original way of doing things. "-t" logic is only turned on
1034 	 * when the user has specified it via a command-line argument.
1035 	 */
1036 	if (xflag == 0)
1037 		if (node->f) {		/* a function */
1038 			(void) fprintf(outf, "%s\t%s\t%c^",
1039 				node->entry, node->file, searchar);
1040 			for (sp = node->pat; *sp; sp++)
1041 				if (*sp == '\\')
1042 					(void) fprintf(outf, "\\\\");
1043 				else if (*sp == searchar)
1044 					(void) fprintf(outf, "\\%c", searchar);
1045 				else
1046 					(void) putc(*sp, outf);
1047 			(void) fprintf(outf, "%c\n", searchar);
1048 		} else {		/* a typedef; text pattern inadequate */
1049 			(void) fprintf(outf, "%s\t%s\t%d\n",
1050 				node->entry, node->file, node->lno);
1051 		} else if (vflag)
1052 		(void) fprintf(stdout, "%s %s %d\n",
1053 				node->entry, node->file, (node->lno+63)/64);
1054 	else
1055 		(void) fprintf(stdout, "%-16s %4d %-16s %s\n",
1056 			node->entry, node->lno, node->file, node->pat);
1057 #endif /* XPG4 */
1058 	put_entries(node->right);
1059 }
1060 
1061 
1062 static int
1063 PF_funcs(FILE *fi)
1064 {
1065 
1066 	pfcnt = 0;
1067 	while (fgets(lbuf, sizeof (lbuf), fi)) {
1068 		lineno++;
1069 		dbp = lbuf;
1070 		if (*dbp == '%') dbp++;	/* Ratfor escape to fortran */
1071 		while (isspace(*dbp))
1072 			dbp++;
1073 		if (*dbp == 0)
1074 			continue;
1075 		switch (*dbp |' ') {
1076 
1077 		    case 'i':
1078 			if (tail("integer"))
1079 				takeprec();
1080 			break;
1081 		    case 'r':
1082 			if (tail("real"))
1083 				takeprec();
1084 			break;
1085 		    case 'l':
1086 			if (tail("logical"))
1087 				takeprec();
1088 			break;
1089 		    case 'c':
1090 			if (tail("complex") || tail("character"))
1091 				takeprec();
1092 			break;
1093 		    case 'd':
1094 			if (tail("double")) {
1095 				while (isspace(*dbp))
1096 					dbp++;
1097 				if (*dbp == 0)
1098 					continue;
1099 				if (tail("precision"))
1100 					break;
1101 				continue;
1102 			}
1103 			break;
1104 		}
1105 		while (isspace(*dbp))
1106 			dbp++;
1107 		if (*dbp == 0)
1108 			continue;
1109 		switch (*dbp|' ') {
1110 
1111 		    case 'f':
1112 			if (tail("function"))
1113 				getit();
1114 			continue;
1115 		    case 's':
1116 			if (tail("subroutine"))
1117 				getit();
1118 			continue;
1119 		    case 'p':
1120 			if (tail("program")) {
1121 				getit();
1122 				continue;
1123 			}
1124 			if (tail("procedure"))
1125 				getit();
1126 			continue;
1127 		}
1128 	}
1129 	return (pfcnt);
1130 }
1131 
1132 static int
1133 tail(char *cp)
1134 {
1135 	int len = 0;
1136 
1137 	while (*cp && (*cp&~' ') == ((*(dbp+len))&~' '))
1138 		cp++, len++;
1139 	if (*cp == 0) {
1140 		dbp += len;
1141 		return (1);
1142 	}
1143 	return (0);
1144 }
1145 
1146 static void
1147 takeprec(void)
1148 {
1149 	while (isspace(*dbp))
1150 		dbp++;
1151 	if (*dbp != '*')
1152 		return;
1153 	dbp++;
1154 	while (isspace(*dbp))
1155 		dbp++;
1156 	if (!isdigit(*dbp)) {
1157 		--dbp;		/* force failure */
1158 		return;
1159 	}
1160 	do
1161 		dbp++;
1162 	while (isdigit(*dbp));
1163 }
1164 
1165 static void
1166 getit(void)
1167 {
1168 	char *cp;
1169 	char c;
1170 	char nambuf[LINEBUFSIZ];
1171 
1172 	for (cp = lbuf; *cp; cp++)
1173 		;
1174 	*--cp = 0;	/* zap newline */
1175 	while (isspace(*dbp))
1176 		dbp++;
1177 	if (*dbp == 0 || !isalpha(*dbp) || !isascii(*dbp))
1178 		return;
1179 	for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++)
1180 		continue;
1181 	c = cp[0];
1182 	cp[0] = 0;
1183 	(void) strcpy(nambuf, dbp);
1184 	cp[0] = c;
1185 	pfnote(nambuf, lineno, TRUE);
1186 	pfcnt++;
1187 }
1188 
1189 static char *
1190 savestr(char *cp)
1191 {
1192 	int len;
1193 	char *dp;
1194 
1195 	len = strlen(cp);
1196 	dp = (char *)malloc(len+1);
1197 	(void) strcpy(dp, cp);
1198 
1199 	return (dp);
1200 }
1201 
1202 /*
1203  * lisp tag functions
1204  * just look for (def or (DEF
1205  */
1206 
1207 static void
1208 L_funcs(FILE *fi)
1209 {
1210 	int	special;
1211 
1212 	pfcnt = 0;
1213 	while (fgets(lbuf, sizeof (lbuf), fi)) {
1214 		lineno++;
1215 		dbp = lbuf;
1216 		if (dbp[0] == '(' &&
1217 		    (dbp[1] == 'D' || dbp[1] == 'd') &&
1218 		    (dbp[2] == 'E' || dbp[2] == 'e') &&
1219 		    (dbp[3] == 'F' || dbp[3] == 'f')) {
1220 			dbp += 4;
1221 			if (striccmp(dbp, "method") == 0 ||
1222 			    striccmp(dbp, "wrapper") == 0 ||
1223 			    striccmp(dbp, "whopper") == 0)
1224 				special = TRUE;
1225 			else
1226 				special = FALSE;
1227 			while (!isspace(*dbp))
1228 				dbp++;
1229 			while (isspace(*dbp))
1230 				dbp++;
1231 			L_getit(special);
1232 		}
1233 	}
1234 }
1235 
1236 static void
1237 L_getit(int special)
1238 {
1239 	char	*cp;
1240 	char	c;
1241 	char	nambuf[LINEBUFSIZ];
1242 
1243 	for (cp = lbuf; *cp; cp++)
1244 		continue;
1245 	*--cp = 0;		/* zap newline */
1246 	if (*dbp == 0)
1247 		return;
1248 	if (special) {
1249 		if ((cp = strchr(dbp, ')')) == NULL)
1250 			return;
1251 		while (cp >= dbp && *cp != ':')
1252 			cp--;
1253 		if (cp < dbp)
1254 			return;
1255 		dbp = cp;
1256 		while (*cp && *cp != ')' && *cp != ' ')
1257 			cp++;
1258 	}
1259 	else
1260 		for (cp = dbp + 1; *cp && *cp != '(' && *cp != ' '; cp++)
1261 			continue;
1262 	c = cp[0];
1263 	cp[0] = 0;
1264 	(void) strcpy(nambuf, dbp);
1265 	cp[0] = c;
1266 	pfnote(nambuf, lineno, TRUE);
1267 	pfcnt++;
1268 }
1269 
1270 /*
1271  * striccmp:
1272  *	Compare two strings over the length of the second, ignoring
1273  *	case distinctions.  If they are the same, return 0.  If they
1274  *	are different, return the difference of the first two different
1275  *	characters.  It is assumed that the pattern (second string) is
1276  *	completely lower case.
1277  */
1278 static int
1279 striccmp(char *str, char *pat)
1280 {
1281 	int	c1;
1282 
1283 	while (*pat) {
1284 		if (isupper(*str))
1285 			c1 = tolower(*str);
1286 		else
1287 			c1 = *str;
1288 		if (c1 != *pat)
1289 			return (c1 - *pat);
1290 		pat++;
1291 		str++;
1292 	}
1293 	return (0);
1294 }
1295 
1296 /*
1297  * first_char:
1298  *	Return the first non-blank character in the file.  After
1299  *	finding it, rewind the input file so we start at the beginning
1300  *	again.
1301  */
1302 static int
1303 first_char(void)
1304 {
1305 	int	c;
1306 	long	off;
1307 
1308 	off = ftell(inf);
1309 	while ((c = getc(inf)) != EOF)
1310 		if (!isspace(c) && c != '\r') {
1311 			(void) fseek(inf, off, 0);
1312 			return (c);
1313 		}
1314 	(void) fseek(inf, off, 0);
1315 	return (EOF);
1316 }
1317 
1318 /*
1319  * toss_yysec:
1320  *	Toss away code until the next "%%" line.
1321  */
1322 static void
1323 toss_yysec(void)
1324 {
1325 	char		buf[BUFSIZ];
1326 
1327 	for (;;) {
1328 		lineftell = ftell(inf);
1329 		if (fgets(buf, BUFSIZ, inf) == NULL)
1330 			return;
1331 		lineno++;
1332 		if (strncmp(buf, "%%", 2) == 0)
1333 			return;
1334 	}
1335 }
1336 
1337 static void
1338 Usage(void)
1339 {
1340 #ifdef XPG4
1341 	(void) fprintf(stderr, gettext("Usage:\tctags [-aBFuvw] "
1342 #else /*  !XPG4 */
1343 	(void) fprintf(stderr, gettext("Usage:\tctags [-aBFtuvw] "
1344 #endif /*  XPG4 */
1345 		    "[-f tagsfile] file ...\n"));
1346 	(void) fprintf(stderr, gettext("OR:\tctags [-x] file ...\n"));
1347 	exit(1);
1348 }
1349 
1350 
1351 /*
1352  * parseargs():		modify the args
1353  *	the purpose of this routine is to transform any ancient argument
1354  *	usage into a format which is acceptable to getopt(3C), so that we
1355  *	retain backwards Solaris 2.[0-4] compatibility.
1356  *
1357  *	This routine allows us to make full use of getopts, without any
1358  *	funny argument processing in main().
1359  *
1360  *	The other alternative would be to hand-craft the processed arguments
1361  *	during and after getopt(3C) - which usually leads to uglier code
1362  *	in main(). I've opted to keep the ugliness isolated down here,
1363  *	instead of in main().
1364  *
1365  *	In a nutshell, if the user has used the old Solaris syntax of:
1366  *		ctags [-aBFtuvwx] [-f tagsfile] filename ...
1367  *	We simply change this into:
1368  *		ctags [-a] [-B] [-F] [-t] [-u] [-v] [-w] [-x] [-f tags] file...
1369  *
1370  *	If the user has specified the new getopt(3C) syntax, we merely
1371  *	copy that into our modified argument space.
1372  */
1373 static void
1374 parseargs(int ac, char **av)
1375 {
1376 	int i;			/* current argument			*/
1377 	int a;			/* used to parse combined arguments	*/
1378 	int fflag;		/* 1 = we're only parsing filenames	*/
1379 	size_t sz;		/* size of the argument			*/
1380 	size_t mav_sz;		/* size of our psuedo argument space	*/
1381 
1382 	i = mac = fflag = 0;	/* proper initializations */
1383 
1384 	mav_sz = ((ac + 1) * sizeof (char *));
1385 	if ((mav = malloc(mav_sz)) == (char **)NULL) {
1386 		perror("Can't malloc argument space");
1387 		exit(1);
1388 	}
1389 
1390 	/* for each argument, see if we need to change things:		*/
1391 	for (; (av[i] != NULL) && (av[i][0] != '\0'); i++) {
1392 
1393 		if (strcmp(av[i], "--") == 0) {
1394 			fflag = 1;	/* just handle filenames now	*/
1395 		}
1396 
1397 		sz = strlen(&av[i][0]);	/* get this arg's size		*/
1398 
1399 		/*
1400 		 * if the argument starts with a "-", and has more than
1401 		 * 1 flag, then we have to search through each character,
1402 		 * and separate any flags which have been combined.
1403 		 *
1404 		 * so, if we've found a "-" string which needs separating:
1405 		 */
1406 		if (fflag == 0 &&	/* not handling filename args	*/
1407 		    av[i][0] == '-' &&	/* and this is a flag		*/
1408 		    sz > 2) {		/* and there's more than 1 flag	*/
1409 			/* then for each flag after the "-" sign:	*/
1410 			for (a = 1; av[i][a]; a++) {
1411 				/* copy the flag into mav space.	*/
1412 				if (a > 1) {
1413 					/*
1414 					 * we need to call realloc() after the
1415 					 * 1st combined flag, because "ac"
1416 					 * doesn't include combined args.
1417 					 */
1418 					mav_sz += sizeof (char *);
1419 					if ((mav = realloc(mav, mav_sz)) ==
1420 					    (char **)NULL) {
1421 						perror("Can't realloc "
1422 							"argument space");
1423 						exit(1);
1424 					}
1425 				}
1426 
1427 				if ((mav[mac] = malloc((size_t)CPFLAG)) ==
1428 				    (char *)NULL) {
1429 					perror("Can't malloc argument space");
1430 					exit(1);
1431 				}
1432 				(void) sprintf(mav[mac], "-%c", av[i][a]);
1433 				++mac;
1434 			}
1435 		} else {
1436 			/* otherwise, just copy the argument:		*/
1437 			if ((mav[mac] = malloc(sz + 1)) == (char *)NULL) {
1438 				perror("Can't malloc argument space");
1439 				exit(1);
1440 			}
1441 			(void) strcpy(mav[mac], av[i]);
1442 			++mac;
1443 		}
1444 	}
1445 
1446 	mav[mac] = (char *)NULL;
1447 }
1448