1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2011 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * Glenn Fowler
25  * AT&T Research
26  *
27  * library interface to file
28  *
29  * the sum of the hacks {s5,v10,planix} is _____ than the parts
30  */
31 
32 static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2011-03-09 $\0\n";
33 
34 static const char lib[] = "libast:magic";
35 
36 #include <ast.h>
37 #include <ctype.h>
38 #include <ccode.h>
39 #include <dt.h>
40 #include <modex.h>
41 #include <error.h>
42 #include <regex.h>
43 #include <swap.h>
44 
45 #define T(m)		(*m?ERROR_translate(NiL,NiL,lib,m):m)
46 
47 #define match(s,p)	strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
48 
49 #define MAXNEST		10		/* { ... } nesting limit	*/
50 #define MINITEM		4		/* magic buffer rounding	*/
51 
52 typedef struct				/* identifier dictionary entry	*/
53 {
54 	const char	name[16];	/* identifier name		*/
55 	int		value;		/* identifier value		*/
56 	Dtlink_t	link;		/* dictionary link		*/
57 } Info_t;
58 
59 typedef struct Edit			/* edit substitution		*/
60 {
61 	struct Edit*	next;		/* next in list			*/
62 	regex_t*	from;		/* from pattern			*/
63 } Edit_t;
64 
65 struct Entry;
66 
67 typedef struct				/* loop info			*/
68 {
69 	struct Entry*	lab;		/* call this function		*/
70 	int		start;		/* start here			*/
71 	int		size;		/* increment by this amount	*/
72 	int		count;		/* dynamic loop count		*/
73 	int		offset;		/* dynamic offset		*/
74 } Loop_t;
75 
76 typedef struct Entry			/* magic file entry		*/
77 {
78 	struct Entry*	next;		/* next in list			*/
79 	char*		expr;		/* offset expression		*/
80 	union
81 	{
82 	unsigned long	num;
83 	char*		str;
84 	struct Entry*	lab;
85 	regex_t*	sub;
86 	Loop_t*		loop;
87 	}		value;		/* comparison value		*/
88 	char*		desc;		/* file description		*/
89 	char*		mime;		/* file mime type		*/
90 	unsigned long	offset;		/* offset in bytes		*/
91 	unsigned long	mask;		/* mask before compare		*/
92 	char		cont;		/* continuation operation	*/
93 	char		type;		/* datum type			*/
94 	char		op;		/* comparison operation		*/
95 	char		nest;		/* { or } nesting operation	*/
96 	char		swap;		/* forced swap order		*/
97 } Entry_t;
98 
99 #define CC_BIT		5
100 
101 #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
102 typedef unsigned short Cctype_t;
103 #else
104 typedef unsigned long Cctype_t;
105 #endif
106 
107 #define CC_text		0x01
108 #define CC_control	0x02
109 #define CC_latin	0x04
110 #define CC_binary	0x08
111 #define CC_utf_8	0x10
112 
113 #define CC_notext	CC_text		/* CC_text is flipped before checking */
114 
115 #define CC_MASK		(CC_binary|CC_latin|CC_control|CC_text)
116 
117 #define CCTYPE(c)	(((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
118 
119 #define ID_NONE		0
120 #define ID_ASM		1
121 #define ID_C		2
122 #define ID_COBOL	3
123 #define ID_COPYBOOK	4
124 #define ID_CPLUSPLUS	5
125 #define ID_FORTRAN	6
126 #define ID_HTML		7
127 #define ID_INCL1	8
128 #define ID_INCL2	9
129 #define ID_INCL3	10
130 #define ID_MAM1		11
131 #define ID_MAM2		12
132 #define ID_MAM3		13
133 #define ID_NOTEXT	14
134 #define ID_PL1		15
135 #define ID_YACC		16
136 
137 #define ID_MAX		ID_YACC
138 
139 #define INFO_atime	1
140 #define INFO_blocks	2
141 #define INFO_ctime	3
142 #define INFO_fstype	4
143 #define INFO_gid	5
144 #define INFO_mode	6
145 #define INFO_mtime	7
146 #define INFO_name	8
147 #define INFO_nlink	9
148 #define INFO_size	10
149 #define INFO_uid	11
150 
151 #define _MAGIC_PRIVATE_ \
152 	Magicdisc_t*	disc;			/* discipline		*/ \
153 	Vmalloc_t*	vm;			/* vmalloc region	*/ \
154 	Entry_t*	magic;			/* parsed magic table	*/ \
155 	Entry_t*	magiclast;		/* last entry in magic	*/ \
156 	char*		mime;			/* MIME type		*/ \
157 	unsigned char*	x2n;			/* CC_ALIEN=>CC_NATIVE	*/ \
158 	char		fbuf[SF_BUFSIZE + 1];	/* file data		*/ \
159 	char		xbuf[SF_BUFSIZE + 1];	/* indirect file data	*/ \
160 	char		nbuf[256];		/* !CC_NATIVE data	*/ \
161 	char		mbuf[64];		/* mime string		*/ \
162 	char		sbuf[64];		/* type suffix string	*/ \
163 	char		tbuf[2 * PATH_MAX];	/* type string		*/ \
164 	Cctype_t	cctype[UCHAR_MAX + 1];	/* char code types	*/ \
165 	unsigned int	count[UCHAR_MAX + 1];	/* char frequency count	*/ \
166 	unsigned int	multi[UCHAR_MAX + 1];	/* muti char count	*/ \
167 	int		keep[MAXNEST];		/* ckmagic nest stack	*/ \
168 	char*		cap[MAXNEST];		/* ckmagic mime stack	*/ \
169 	char*		msg[MAXNEST];		/* ckmagic text stack	*/ \
170 	Entry_t*	ret[MAXNEST];		/* ckmagic return stack	*/ \
171 	int		fbsz;			/* fbuf size		*/ \
172 	int		fbmx;			/* fbuf max size	*/ \
173 	int		xbsz;			/* xbuf size		*/ \
174 	int		swap;			/* swap() operation	*/ \
175 	unsigned long	flags;			/* disc+open flags	*/ \
176 	long		xoff;			/* xbuf offset		*/ \
177 	int		identifier[ID_MAX + 1];	/* Info_t identifier	*/ \
178 	Sfio_t*		fp;			/* fbuf fp		*/ \
179 	Sfio_t*		tmp;			/* tmp string		*/ \
180 	regdisc_t	redisc;			/* regex discipline	*/ \
181 	Dtdisc_t	dtdisc;			/* dict discipline	*/ \
182 	Dt_t*		idtab;			/* identifier dict	*/ \
183 	Dt_t*		infotab;		/* info keyword dict	*/
184 
185 #include <magic.h>
186 
187 static Info_t		dict[] =		/* keyword dictionary	*/
188 {
189 	{ 	"COMMON",	ID_FORTRAN	},
190 	{ 	"COMPUTE",	ID_COBOL	},
191 	{ 	"COMP",		ID_COPYBOOK	},
192 	{ 	"COMPUTATIONAL",ID_COPYBOOK	},
193 	{ 	"DCL",		ID_PL1		},
194 	{ 	"DEFINED",	ID_PL1		},
195 	{ 	"DIMENSION",	ID_FORTRAN	},
196 	{ 	"DIVISION",	ID_COBOL	},
197 	{ 	"FILLER",	ID_COPYBOOK	},
198 	{ 	"FIXED",	ID_PL1		},
199 	{ 	"FUNCTION",	ID_FORTRAN	},
200 	{ 	"HTML",		ID_HTML		},
201 	{ 	"INTEGER",	ID_FORTRAN	},
202 	{ 	"MAIN",		ID_PL1		},
203 	{ 	"OPTIONS",	ID_PL1		},
204 	{ 	"PERFORM",	ID_COBOL	},
205 	{ 	"PIC",		ID_COPYBOOK	},
206 	{ 	"REAL",		ID_FORTRAN	},
207 	{ 	"REDEFINES",	ID_COPYBOOK	},
208 	{ 	"S9",		ID_COPYBOOK	},
209 	{ 	"SECTION",	ID_COBOL	},
210 	{ 	"SELECT",	ID_COBOL	},
211 	{ 	"SUBROUTINE",	ID_FORTRAN	},
212 	{ 	"TEXT",		ID_ASM		},
213 	{ 	"VALUE",	ID_COPYBOOK	},
214 	{ 	"attr",		ID_MAM3		},
215 	{ 	"binary",	ID_YACC		},
216 	{ 	"block",	ID_FORTRAN	},
217 	{ 	"bss",		ID_ASM		},
218 	{ 	"byte",		ID_ASM		},
219 	{ 	"char",		ID_C		},
220 	{ 	"class",	ID_CPLUSPLUS	},
221 	{ 	"clr",		ID_NOTEXT	},
222 	{ 	"comm",		ID_ASM		},
223 	{ 	"common",	ID_FORTRAN	},
224 	{ 	"data",		ID_ASM		},
225 	{ 	"dimension",	ID_FORTRAN	},
226 	{ 	"done",		ID_MAM2		},
227 	{ 	"double",	ID_C		},
228 	{ 	"even",		ID_ASM		},
229 	{ 	"exec",		ID_MAM3		},
230 	{ 	"extern",	ID_C		},
231 	{ 	"float",	ID_C		},
232 	{ 	"function",	ID_FORTRAN	},
233 	{ 	"globl",	ID_ASM		},
234 	{ 	"h",		ID_INCL3	},
235 	{ 	"html",		ID_HTML		},
236 	{ 	"include",	ID_INCL1	},
237 	{ 	"int",		ID_C		},
238 	{ 	"integer",	ID_FORTRAN	},
239 	{ 	"jmp",		ID_NOTEXT	},
240 	{ 	"left",		ID_YACC		},
241 	{ 	"libc",		ID_INCL2	},
242 	{ 	"long",		ID_C		},
243 	{ 	"make",		ID_MAM1		},
244 	{ 	"mov",		ID_NOTEXT	},
245 	{ 	"private",	ID_CPLUSPLUS	},
246 	{ 	"public",	ID_CPLUSPLUS	},
247 	{ 	"real",		ID_FORTRAN	},
248 	{ 	"register",	ID_C		},
249 	{ 	"right",	ID_YACC		},
250 	{ 	"sfio",		ID_INCL2	},
251 	{ 	"static",	ID_C		},
252 	{ 	"stdio",	ID_INCL2	},
253 	{ 	"struct",	ID_C		},
254 	{ 	"subroutine",	ID_FORTRAN	},
255 	{ 	"sys",		ID_NOTEXT	},
256 	{ 	"term",		ID_YACC		},
257 	{ 	"text",		ID_ASM		},
258 	{ 	"tst",		ID_NOTEXT	},
259 	{ 	"type",		ID_YACC		},
260 	{ 	"typedef",	ID_C		},
261 	{ 	"u",		ID_INCL2	},
262 	{ 	"union",	ID_YACC		},
263 	{ 	"void",		ID_C		},
264 };
265 
266 static Info_t		info[] =
267 {
268 	{	"atime",	INFO_atime		},
269 	{	"blocks",	INFO_blocks		},
270 	{	"ctime",	INFO_ctime		},
271 	{	"fstype",	INFO_fstype		},
272 	{	"gid",		INFO_gid		},
273 	{	"mode",		INFO_mode		},
274 	{	"mtime",	INFO_mtime		},
275 	{	"name",		INFO_name		},
276 	{	"nlink",	INFO_nlink		},
277 	{	"size",		INFO_size		},
278 	{	"uid",		INFO_uid		},
279 };
280 
281 /*
282  * return pointer to data at offset off and size siz
283  */
284 
285 static char*
getdata(register Magic_t * mp,register long off,register int siz)286 getdata(register Magic_t* mp, register long off, register int siz)
287 {
288 	register long	n;
289 
290 	if (off < 0)
291 		return 0;
292 	if (off + siz <= mp->fbsz)
293 		return mp->fbuf + off;
294 	if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
295 	{
296 		if (off + siz > mp->fbmx)
297 			return 0;
298 		n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
299 		if (sfseek(mp->fp, n, SEEK_SET) != n)
300 			return 0;
301 		if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
302 		{
303 			mp->xoff = 0;
304 			mp->xbsz = 0;
305 			return 0;
306 		}
307 		mp->xbuf[mp->xbsz] = 0;
308 		mp->xoff = n;
309 		if (off + siz > mp->xoff + mp->xbsz)
310 			return 0;
311 	}
312 	return mp->xbuf + off - mp->xoff;
313 }
314 
315 /*
316  * @... evaluator for strexpr()
317  */
318 
319 static long
indirect(const char * cs,char ** e,void * handle)320 indirect(const char* cs, char** e, void* handle)
321 {
322 	register char*		s = (char*)cs;
323 	register Magic_t*	mp = (Magic_t*)handle;
324 	register long		n = 0;
325 	register char*		p;
326 
327 	if (s)
328 	{
329 		if (*s == '@')
330 		{
331 			n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
332 			switch (*(s = *e))
333 			{
334 			case 'b':
335 			case 'B':
336 				s++;
337 				if (p = getdata(mp, n, 1))
338 					n = *(unsigned char*)p;
339 				else
340 					s = (char*)cs;
341 				break;
342 			case 'h':
343 			case 'H':
344 				s++;
345 				if (p = getdata(mp, n, 2))
346 					n = swapget(mp->swap, p, 2);
347 				else
348 					s = (char*)cs;
349 				break;
350 			case 'q':
351 			case 'Q':
352 				s++;
353 				if (p = getdata(mp, n, 8))
354 					n = swapget(mp->swap, p, 8);
355 				else
356 					s = (char*)cs;
357 				break;
358 			default:
359 				if (isalnum(*s))
360 					s++;
361 				if (p = getdata(mp, n, 4))
362 					n = swapget(mp->swap, p, 4);
363 				else
364 					s = (char*)cs;
365 				break;
366 			}
367 		}
368 		*e = s;
369 	}
370 	else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
371 		(*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
372 	return n;
373 }
374 
375 /*
376  * emit regex error message
377  */
378 
379 static void
regmessage(Magic_t * mp,regex_t * re,int code)380 regmessage(Magic_t* mp, regex_t* re, int code)
381 {
382 	char	buf[128];
383 
384 	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
385 	{
386 		regerror(code, re, buf, sizeof(buf));
387 		(*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
388 	}
389 }
390 
391 /*
392  * decompose vcodex(3) method composition
393  */
394 
395 static char*
vcdecomp(char * b,char * e,unsigned char * m,unsigned char * x)396 vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
397 {
398 	unsigned char*	map;
399 	const char*	o;
400 	int		c;
401 	int		n;
402 	int		i;
403 	int		a;
404 
405 	map = CCMAP(CC_ASCII, CC_NATIVE);
406 	a = 0;
407 	i = 1;
408 	for (;;)
409 	{
410 		if (i)
411 			i = 0;
412 		else
413 			*b++ = '^';
414 		if (m < (x - 1) && !*(m + 1))
415 		{
416 			/*
417 			 * obsolete indices
418 			 */
419 
420 			if (!a)
421 			{
422 				a = 1;
423 				o = "old, ";
424 				while (b < e && (c = *o++))
425 					*b++ = c;
426 			}
427 			switch (*m)
428 			{
429 			case 0:		o = "delta"; break;
430 			case 1:		o = "huffman"; break;
431 			case 2:		o = "huffgroup"; break;
432 			case 3:		o = "arith"; break;
433 			case 4:		o = "bwt"; break;
434 			case 5:		o = "rle"; break;
435 			case 6:		o = "mtf"; break;
436 			case 7:		o = "transpose"; break;
437 			case 8:		o = "table"; break;
438 			case 9:		o = "huffpart"; break;
439 			case 50:	o = "map"; break;
440 			case 100:	o = "recfm"; break;
441 			case 101:	o = "ss7"; break;
442 			default:	o = "UNKNOWN"; break;
443 			}
444 			m += 2;
445 			while (b < e && (c = *o++))
446 				*b++ = c;
447 		}
448 		else
449 			while (b < e && m < x && (c = *m++))
450 			{
451 				if (map)
452 					c = map[c];
453 				*b++ = c;
454 			}
455 		if (b >= e)
456 			break;
457 		n = 0;
458 		while (m < x)
459 		{
460 			n = (n<<7) | (*m & 0x7f);
461 			if (!(*m++ & 0x80))
462 				break;
463 		}
464 		if (n >= (x - m))
465 			break;
466 		m += n;
467 	}
468 	return b;
469 }
470 
471 /*
472  * check for magic table match in buf
473  */
474 
475 static char*
ckmagic(register Magic_t * mp,const char * file,char * buf,char * end,struct stat * st,unsigned long off)476 ckmagic(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st, unsigned long off)
477 {
478 	register Entry_t*	ep;
479 	register char*		p;
480 	register char*		b;
481 	register int		level = 0;
482 	int			call = -1;
483 	int			all = 0;
484 	int			c;
485 	int			str;
486 	char*			q;
487 	char*			t;
488 	char*			cur;
489 	char*			base = 0;
490 	unsigned long		num;
491 	unsigned long		mask;
492 	regmatch_t		matches[10];
493 
494 	mp->swap = 0;
495 	b = mp->msg[0] = cur = buf;
496 	mp->mime = mp->cap[0] = 0;
497 	mp->keep[0] = 0;
498 	for (ep = mp->magic; ep; ep = ep->next)
499 	{
500 	fun:
501 		if (ep->nest == '{')
502 		{
503 			if (++level >= MAXNEST)
504 			{
505 				call = -1;
506 				level = 0;
507 				mp->keep[0] = 0;
508 				b = mp->msg[0];
509 				mp->mime = mp->cap[0];
510 				continue;
511 			}
512 			mp->keep[level] = mp->keep[level - 1] != 0;
513 			mp->msg[level] = b;
514 			mp->cap[level] = mp->mime;
515 		}
516 		switch (ep->cont)
517 		{
518 		case '#':
519 			if (mp->keep[level] && b > cur)
520 			{
521 				if ((mp->flags & MAGIC_ALL) && b < (end - 3))
522 				{
523 					all = 1;
524 					*b++ = '\n';
525 					cur = b;
526 					continue;
527 				}
528 				*b = 0;
529 				return buf;
530 			}
531 			mp->swap = 0;
532 			b = mp->msg[0] = cur;
533 			mp->mime = mp->cap[0] = 0;
534 			if (ep->type == ' ')
535 				continue;
536 			break;
537 		case '$':
538 			if (mp->keep[level] && call < (MAXNEST - 1))
539 			{
540 				mp->ret[++call] = ep;
541 				ep = ep->value.lab;
542 				goto fun;
543 			}
544 			continue;
545 		case ':':
546 			ep = mp->ret[call--];
547 			if (ep->op == 'l')
548 				goto fun;
549 			continue;
550 		case '|':
551 			if (mp->keep[level] > 1)
552 				goto checknest;
553 			/*FALLTHROUGH*/
554 		default:
555 			if (!mp->keep[level])
556 			{
557 				b = mp->msg[level];
558 				mp->mime = mp->cap[level];
559 				goto checknest;
560 			}
561 			break;
562 		}
563 		p = "";
564 		num = 0;
565 		if (!ep->expr)
566 			num = ep->offset + off;
567 		else
568 			switch (ep->offset)
569 			{
570 			case 0:
571 				num = strexpr(ep->expr, NiL, indirect, mp) + off;
572 				break;
573 			case INFO_atime:
574 				num = st->st_atime;
575 				ep->type = 'D';
576 				break;
577 			case INFO_blocks:
578 				num = iblocks(st);
579 				ep->type = 'N';
580 				break;
581 			case INFO_ctime:
582 				num = st->st_ctime;
583 				ep->type = 'D';
584 				break;
585 			case INFO_fstype:
586 				p = fmtfs(st);
587 				ep->type = toupper(ep->type);
588 				break;
589 			case INFO_gid:
590 				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
591 				{
592 					p = fmtgid(st->st_gid);
593 					ep->type = toupper(ep->type);
594 				}
595 				else
596 				{
597 					num = st->st_gid;
598 					ep->type = 'N';
599 				}
600 				break;
601 			case INFO_mode:
602 				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
603 				{
604 					p = fmtmode(st->st_mode, 0);
605 					ep->type = toupper(ep->type);
606 				}
607 				else
608 				{
609 					num = modex(st->st_mode);
610 					ep->type = 'N';
611 				}
612 				break;
613 			case INFO_mtime:
614 				num = st->st_ctime;
615 				ep->type = 'D';
616 				break;
617 			case INFO_name:
618 				if (!base)
619 				{
620 					if (base = strrchr(file, '/'))
621 						base++;
622 					else
623 						base = (char*)file;
624 				}
625 				p = base;
626 				ep->type = toupper(ep->type);
627 				break;
628 			case INFO_nlink:
629 				num = st->st_nlink;
630 				ep->type = 'N';
631 				break;
632 			case INFO_size:
633 				num = st->st_size;
634 				ep->type = 'N';
635 				break;
636 			case INFO_uid:
637 				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
638 				{
639 					p = fmtuid(st->st_uid);
640 					ep->type = toupper(ep->type);
641 				}
642 				else
643 				{
644 					num = st->st_uid;
645 					ep->type = 'N';
646 				}
647 				break;
648 			}
649 		switch (ep->type)
650 		{
651 
652 		case 'b':
653 			if (!(p = getdata(mp, num, 1)))
654 				goto next;
655 			num = *(unsigned char*)p;
656 			break;
657 
658 		case 'h':
659 			if (!(p = getdata(mp, num, 2)))
660 				goto next;
661 			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
662 			break;
663 
664 		case 'd':
665 		case 'l':
666 		case 'v':
667 			if (!(p = getdata(mp, num, 4)))
668 				goto next;
669 			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
670 			break;
671 
672 		case 'q':
673 			if (!(p = getdata(mp, num, 8)))
674 				goto next;
675 			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
676 			break;
677 
678 		case 'e':
679 			if (!(p = getdata(mp, num, 0)))
680 				goto next;
681 			/*FALLTHROUGH*/
682 		case 'E':
683 			if (!ep->value.sub)
684 				goto next;
685 			if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
686 			{
687 				c = mp->fbsz;
688 				if (c >= sizeof(mp->nbuf))
689 					c = sizeof(mp->nbuf) - 1;
690 				p = (char*)memcpy(mp->nbuf, p, c);
691 				p[c] = 0;
692 				ccmapstr(mp->x2n, p, c);
693 				if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
694 				{
695 					if (c != REG_NOMATCH)
696 						regmessage(mp, ep->value.sub, c);
697 					goto next;
698 				}
699 			}
700 			p = ep->value.sub->re_sub->re_buf;
701 			q = T(ep->desc);
702 			t = *q ? q : p;
703 			if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
704 				*b++ = ' ';
705 			b += sfsprintf(b, end - b, *q ? q : "%s", p + (*p == '\b'));
706 			if (ep->mime)
707 				mp->mime = ep->mime;
708 			goto checknest;
709 
710 		case 's':
711 			if (!(p = getdata(mp, num, ep->mask)))
712 				goto next;
713 			goto checkstr;
714 		case 'm':
715 			if (!(p = getdata(mp, num, 0)))
716 				goto next;
717 			/*FALLTHROUGH*/
718 		case 'M':
719 		case 'S':
720 		checkstr:
721 			for (;;)
722 			{
723 				if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
724 					break;
725 				if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
726 					break;
727 				if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
728 					goto next;
729 				p = (char*)memcpy(mp->nbuf, p, ep->mask);
730 				p[ep->mask] = 0;
731 				ccmapstr(mp->x2n, p, ep->mask);
732 			}
733 			q = T(ep->desc);
734 			if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
735 				*b++ = ' ';
736 			for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
737 			*t = 0;
738 			b += sfsprintf(b, end - b, q + (*q == '\b'), p);
739 			*t = c;
740 			if (ep->mime)
741 				mp->mime = ep->mime;
742 			goto checknest;
743 
744 		}
745 		if (mask = ep->mask)
746 			num &= mask;
747 		switch (ep->op)
748 		{
749 
750 		case '=':
751 		case '@':
752 			if (num == ep->value.num)
753 				break;
754 			if (ep->cont != '#')
755 				goto next;
756 			if (!mask)
757 				mask = ~mask;
758 			if (ep->type == 'h')
759 			{
760 				if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
761 				{
762 					if (!(mp->swap & (mp->swap + 1)))
763 						mp->swap = 7;
764 					goto swapped;
765 				}
766 			}
767 			else if (ep->type == 'l')
768 			{
769 				for (c = 1; c < 4; c++)
770 					if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
771 					{
772 						if (!(mp->swap & (mp->swap + 1)))
773 							mp->swap = 7;
774 						goto swapped;
775 					}
776 			}
777 			else if (ep->type == 'q')
778 			{
779 				for (c = 1; c < 8; c++)
780 					if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
781 						goto swapped;
782 			}
783 			goto next;
784 
785 		case '!':
786 			if (num != ep->value.num)
787 				break;
788 			goto next;
789 
790 		case '^':
791 			if (num ^ ep->value.num)
792 				break;
793 			goto next;
794 
795 		case '>':
796 			if (num > ep->value.num)
797 				break;
798 			goto next;
799 
800 		case '<':
801 			if (num < ep->value.num)
802 				break;
803 			goto next;
804 
805 		case 'l':
806 			if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
807 			{
808 				if (!ep->value.loop->count)
809 				{
810 					ep->value.loop->count = num;
811 					ep->value.loop->offset = off;
812 					off = ep->value.loop->start;
813 				}
814 				else if (!--ep->value.loop->count)
815 				{
816 					off = ep->value.loop->offset;
817 					goto next;
818 				}
819 				else
820 					off += ep->value.loop->size;
821 				mp->ret[++call] = ep;
822 				ep = ep->value.loop->lab;
823 				goto fun;
824 			}
825 			goto next;
826 
827 		case 'm':
828 			c = mp->swap;
829 			t = ckmagic(mp, file, b + (b > cur), end, st, num);
830 			mp->swap = c;
831 			if (t)
832 			{
833 				if (b > cur && b < end)
834 					*b = ' ';
835 				b += strlen(b);
836 			}
837 			else if (ep->cont == '&')
838 				goto next;
839 			break;
840 
841 		case 'r':
842 #if _UWIN
843 		{
844 			char*			e;
845 			Sfio_t*			rp;
846 			Sfio_t*			gp;
847 
848 			if (!(t = strrchr(file, '.')))
849 				goto next;
850 			sfprintf(mp->tmp, "/reg/classes_root/%s", t);
851 			if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
852 				goto next;
853 			*ep->desc = 0;
854 			*ep->mime = 0;
855 			gp = 0;
856 			while (t = sfgetr(rp, '\n', 1))
857 			{
858 				if (strneq(t, "Content Type=", 13))
859 				{
860 					ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
861 					strcpy(ep->mime, t + 13);
862 					if (gp)
863 						break;
864 				}
865 				else
866 				{
867 					sfprintf(mp->tmp, "/reg/classes_root/%s", t);
868 					if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
869 					{
870 						ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
871 						strcpy(ep->desc, t);
872 						if (*ep->mime)
873 							break;
874 					}
875 				}
876 			}
877 			sfclose(rp);
878 			if (!gp)
879 				goto next;
880 			if (!*ep->mime)
881 			{
882 				t = T(ep->desc);
883 				if (!strncasecmp(t, "microsoft", 9))
884 					t += 9;
885 				while (isspace(*t))
886 					t++;
887 				e = "application/x-ms-";
888 				ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
889 				e = strcopy(ep->mime, e);
890 				while ((c = *t++) && c != '.' && c != ' ')
891 					*e++ = isupper(c) ? tolower(c) : c;
892 				*e = 0;
893 			}
894 			while (t = sfgetr(gp, '\n', 1))
895 				if (*t && !streq(t, "\"\""))
896 				{
897 					ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
898 					strcpy(ep->desc, t);
899 					break;
900 				}
901 			sfclose(gp);
902 			if (!*ep->desc)
903 				goto next;
904 			if (!t)
905 				for (t = T(ep->desc); *t; t++)
906 					if (*t == '.')
907 						*t = ' ';
908 			if (!mp->keep[level])
909 				mp->keep[level] = 2;
910 			mp->mime = ep->mime;
911 			break;
912 		}
913 #else
914 			if (ep->cont == '#' && !mp->keep[level])
915 				mp->keep[level] = 1;
916 			goto next;
917 #endif
918 
919 		case 'v':
920 			if (!(p = getdata(mp, num, 4)))
921 				goto next;
922 			c = 0;
923 			do
924 			{
925 				num++;
926 				c = (c<<7) | (*p & 0x7f);
927 			} while (*p++ & 0x80);
928 			if (!(p = getdata(mp, num, c)))
929 				goto next;
930 			if (mp->keep[level]++ && b > cur && b < (end - 1) && *(b - 1) != ' ')
931 			{
932 				*b++ = ',';
933 				*b++ = ' ';
934 			}
935 			b = vcdecomp(b, cur + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
936 			goto checknest;
937 
938 		}
939 	swapped:
940 		q = T(ep->desc);
941 		if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
942 			*b++ = ' ';
943 		if (*q == '\b')
944 			q++;
945 		str = 0;
946 		for (t = q; *t; t++)
947 			if (*t == '%' && (c = *(t + 1)))
948 			{
949 				if (c == '%')
950 					t++;
951 				else
952 					while (c && c != '%')
953 					{
954 						if (c == 's')
955 						{
956 							str = 1;
957 							break;
958 						}
959 						else if (c == 'c' || c == 'd' || c == 'i' || c == 'u' || c == 'x' || c == 'X')
960 							goto format;
961 						t++;
962 						c = *(t + 1);
963 					}
964 			}
965 	format:
966 		if (!str)
967 			b += sfsprintf(b, end - b, q, num, num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
968 		else if (ep->type == 'd' || ep->type == 'D')
969 			b += sfsprintf(b, end - b, q, fmttime("%?%QL", (time_t)num), 0, 0, 0, 0, 0, 0, 0);
970 		else if (ep->type == 'v')
971 			b += sfsprintf(b, end - b, q, fmtversion(num), 0, 0, 0, 0, 0, 0, 0);
972 		else
973 			b += sfsprintf(b, end - b, q, fmtnum(num, 0), num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
974 		if (ep->mime && *ep->mime)
975 			mp->mime = ep->mime;
976 	checknest:
977 		if (ep->nest == '}')
978 		{
979 			if (!mp->keep[level])
980 			{
981 				b = mp->msg[level];
982 				mp->mime = mp->cap[level];
983 			}
984 			else if (level > 0)
985 				mp->keep[level - 1] = mp->keep[level];
986 			if (--level < 0)
987 			{
988 				level = 0;
989 				mp->keep[0] = 0;
990 			}
991 		}
992 		continue;
993 	next:
994 		if (ep->cont == '&')
995 			mp->keep[level] = 0;
996 		goto checknest;
997 	}
998 	if (all && b-- || mp->keep[level] && b > cur)
999 	{
1000 		*b = 0;
1001 		return buf;
1002 	}
1003 	return 0;
1004 }
1005 
1006 /*
1007  * check english language stats
1008  */
1009 
1010 static int
ckenglish(register Magic_t * mp,int pun,int badpun)1011 ckenglish(register Magic_t* mp, int pun, int badpun)
1012 {
1013 	register char*	s;
1014 	register int	vowl = 0;
1015 	register int	freq = 0;
1016 	register int	rare = 0;
1017 
1018 	if (5 * badpun > pun)
1019 		return 0;
1020 	if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
1021 		return 0;
1022 	if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
1023 		return 0;
1024 	for (s = "aeiou"; *s; s++)
1025 		vowl += mp->count[toupper(*s)] + mp->count[*s];
1026 	for (s = "etaion"; *s; s++)
1027 		freq += mp->count[toupper(*s)] + mp->count[*s];
1028 	for (s = "vjkqxz"; *s; s++)
1029 		rare += mp->count[toupper(*s)] + mp->count[*s];
1030 	return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
1031 }
1032 
1033 /*
1034  * check programming language stats
1035  */
1036 
1037 static char*
cklang(register Magic_t * mp,const char * file,char * buf,char * end,struct stat * st)1038 cklang(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st)
1039 {
1040 	register int		c;
1041 	register unsigned char*	b;
1042 	register unsigned char*	e;
1043 	register int		q;
1044 	register char*		s;
1045 	char*			t;
1046 	char*			base;
1047 	char*			suff;
1048 	char*			t1;
1049 	char*			t2;
1050 	char*			t3;
1051 	int			n;
1052 	int			badpun;
1053 	int			code;
1054 	int			pun;
1055 	Cctype_t		flags;
1056 	Info_t*			ip;
1057 
1058 	b = (unsigned char*)mp->fbuf;
1059 	e = b + mp->fbsz;
1060 	memzero(mp->count, sizeof(mp->count));
1061 	memzero(mp->multi, sizeof(mp->multi));
1062 	memzero(mp->identifier, sizeof(mp->identifier));
1063 
1064 	/*
1065 	 * check character coding
1066 	 */
1067 
1068 	flags = 0;
1069 	while (b < e)
1070 		flags |= mp->cctype[*b++];
1071 	b = (unsigned char*)mp->fbuf;
1072 	code = 0;
1073 	q = CC_ASCII;
1074 	n = CC_MASK;
1075 	for (c = 0; c < CC_MAPS; c++)
1076 	{
1077 		flags ^= CC_text;
1078 		if ((flags & CC_MASK) < n)
1079 		{
1080 			n = flags & CC_MASK;
1081 			q = c;
1082 		}
1083 		flags >>= CC_BIT;
1084 	}
1085 	flags = n;
1086 	if (!(flags & (CC_binary|CC_notext)))
1087 	{
1088 		if (q != CC_NATIVE)
1089 		{
1090 			code = q;
1091 			ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
1092 		}
1093 		if (b[0] == '#' && b[1] == '!')
1094 		{
1095 			for (b += 2; b < e && isspace(*b); b++);
1096 			for (s = (char*)b; b < e && isprint(*b); b++);
1097 			c = *b;
1098 			*b = 0;
1099 			if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
1100 			{
1101 				if (t = strrchr(s, '/'))
1102 					s = t + 1;
1103 				for (t = s; *t; t++)
1104 					if (isspace(*t))
1105 					{
1106 						*t = 0;
1107 						break;
1108 					}
1109 				sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
1110 				mp->mime = mp->mbuf;
1111 				if (match(s, "*sh"))
1112 				{
1113 					t1 = T("command");
1114 					if (streq(s, "sh"))
1115 						*s = 0;
1116 					else
1117 					{
1118 						*b++ = ' ';
1119 						*b = 0;
1120 					}
1121 				}
1122 				else
1123 				{
1124 					t1 = T("interpreter");
1125 					*b++ = ' ';
1126 					*b = 0;
1127 				}
1128 				sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
1129 				s = mp->sbuf;
1130 				goto qualify;
1131 			}
1132 			*b = c;
1133 			b = (unsigned char*)mp->fbuf;
1134 		}
1135 		badpun = 0;
1136 		pun = 0;
1137 		q = 0;
1138 		s = 0;
1139 		t = 0;
1140 		while (b < e)
1141 		{
1142 			c = *b++;
1143 			mp->count[c]++;
1144 			if (c == q && (q != '*' || *b == '/' && b++))
1145 			{
1146 				mp->multi[q]++;
1147 				q = 0;
1148 			}
1149 			else if (c == '\\')
1150 			{
1151 				s = 0;
1152 				b++;
1153 			}
1154 			else if (!q)
1155 			{
1156 				if (isalpha(c) || c == '_')
1157 				{
1158 					if (!s)
1159 						s = (char*)b - 1;
1160 				}
1161 				else if (!isdigit(c))
1162 				{
1163 					if (s)
1164 					{
1165 						if (s > mp->fbuf)
1166 							switch (*(s - 1))
1167 							{
1168 							case ':':
1169 								if (*b == ':')
1170 									mp->multi[':']++;
1171 								break;
1172 							case '.':
1173 								if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
1174 									mp->multi['.']++;
1175 								break;
1176 							case '\n':
1177 							case '\\':
1178 								if (*b == '{')
1179 									t = (char*)b + 1;
1180 								break;
1181 							case '{':
1182 								if (s == t && *b == '}')
1183 									mp->multi['X']++;
1184 								break;
1185 							}
1186 						if (!mp->idtab)
1187 						{
1188 							if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dtset))
1189 								for (q = 0; q < elementsof(dict); q++)
1190 									dtinsert(mp->idtab, &dict[q]);
1191 							else if (mp->disc->errorf)
1192 								(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
1193 							q = 0;
1194 						}
1195 						if (mp->idtab)
1196 						{
1197 							*(b - 1) = 0;
1198 							if (ip = (Info_t*)dtmatch(mp->idtab, s))
1199 								mp->identifier[ip->value]++;
1200 							*(b - 1) = c;
1201 						}
1202 						s = 0;
1203 					}
1204 					switch (c)
1205 					{
1206 					case '\t':
1207 						if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
1208 							mp->multi['\t']++;
1209 						break;
1210 					case '"':
1211 					case '\'':
1212 						q = c;
1213 						break;
1214 					case '/':
1215 						if (*b == '*')
1216 							q = *b++;
1217 						else if (*b == '/')
1218 							q = '\n';
1219 						break;
1220 					case '$':
1221 						if (*b == '(' && *(b + 1) != ' ')
1222 							mp->multi['$']++;
1223 						break;
1224 					case '{':
1225 					case '}':
1226 					case '[':
1227 					case ']':
1228 					case '(':
1229 						mp->multi[c]++;
1230 						break;
1231 					case ')':
1232 						mp->multi[c]++;
1233 						goto punctuation;
1234 					case ':':
1235 						if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
1236 							mp->multi[':']++;
1237 						goto punctuation;
1238 					case '.':
1239 					case ',':
1240 					case '%':
1241 					case ';':
1242 					case '?':
1243 					punctuation:
1244 						pun++;
1245 						if (*b != ' ' && *b != '\n')
1246 							badpun++;
1247 						break;
1248 					}
1249 				}
1250 			}
1251 		}
1252 	}
1253 	else
1254 		while (b < e)
1255 			mp->count[*b++]++;
1256 	base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
1257 	suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
1258 	if (!flags)
1259 	{
1260 		if (match(suff, "*sh|bat|cmd"))
1261 			goto id_sh;
1262 		if (match(base, "*@(mkfile)"))
1263 			goto id_mk;
1264 		if (match(base, "*@(makefile|.mk)"))
1265 			goto id_make;
1266 		if (match(base, "*@(mamfile|.mam)"))
1267 			goto id_mam;
1268 		if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
1269 			goto id_c;
1270 		if (match(suff, "f"))
1271 			goto id_fortran;
1272 		if (match(suff, "htm+(l)"))
1273 			goto id_html;
1274 		if (match(suff, "cpy"))
1275 			goto id_copybook;
1276 		if (match(suff, "cob|cbl|cb2"))
1277 			goto id_cobol;
1278 		if (match(suff, "pl[1i]"))
1279 			goto id_pl1;
1280 		if (match(suff, "tex"))
1281 			goto id_tex;
1282 		if (match(suff, "asm|s"))
1283 			goto id_asm;
1284 		if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
1285 		{
1286 		id_sh:
1287 			s = T("command script");
1288 			mp->mime = "application/sh";
1289 			goto qualify;
1290 		}
1291 		if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
1292 		{
1293 			s = T("mail message");
1294 			mp->mime = "message/rfc822";
1295 			goto qualify;
1296 		}
1297 		if (match(base, "*@(mkfile)"))
1298 		{
1299 		id_mk:
1300 			s = "mkfile";
1301 			mp->mime = "application/mk";
1302 			goto qualify;
1303 		}
1304 		if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
1305 		{
1306 		id_make:
1307 			s = "makefile";
1308 			mp->mime = "application/make";
1309 			goto qualify;
1310 		}
1311 		if (mp->multi['.'] >= 3)
1312 		{
1313 			s = T("nroff input");
1314 			mp->mime = "application/x-troff";
1315 			goto qualify;
1316 		}
1317 		if (mp->multi['X'] >= 3)
1318 		{
1319 			s = T("TeX input");
1320 			mp->mime = "application/x-tex";
1321 			goto qualify;
1322 		}
1323 		if (mp->fbsz < SF_BUFSIZE &&
1324 		    (mp->multi['('] == mp->multi[')'] &&
1325 		     mp->multi['{'] == mp->multi['}'] &&
1326 		     mp->multi['['] == mp->multi[']']) ||
1327 		    mp->fbsz >= SF_BUFSIZE &&
1328 		    (mp->multi['('] >= mp->multi[')'] &&
1329 		     mp->multi['{'] >= mp->multi['}'] &&
1330 		     mp->multi['['] >= mp->multi[']']))
1331 		{
1332 			c = mp->identifier[ID_INCL1];
1333 			if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
1334 			    mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
1335 			    mp->count['='] >= 20 && mp->count[';'] >= 20)
1336 			{
1337 			id_c:
1338 				t1 = "";
1339 				t2 = "c ";
1340 				t3 = T("program");
1341 				switch (*suff)
1342 				{
1343 				case 'c':
1344 				case 'C':
1345 					mp->mime = "application/x-cc";
1346 					break;
1347 				case 'l':
1348 				case 'L':
1349 					t1 = "lex ";
1350 					mp->mime = "application/x-lex";
1351 					break;
1352 				default:
1353 					t3 = T("header");
1354 					if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
1355 					{
1356 						mp->mime = "application/x-cc";
1357 						break;
1358 					}
1359 					/*FALLTHROUGH*/
1360 				case 'y':
1361 				case 'Y':
1362 					t1 = "yacc ";
1363 					mp->mime = "application/x-yacc";
1364 					break;
1365 				}
1366 				if (mp->identifier[ID_CPLUSPLUS] >= 3)
1367 				{
1368 					t2 = "c++ ";
1369 					mp->mime = "application/x-c++";
1370 				}
1371 				sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
1372 				s = mp->sbuf;
1373 				goto qualify;
1374 			}
1375 		}
1376 		if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
1377 		    (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
1378 		     mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
1379 		{
1380 		id_mam:
1381 			s = T("mam program");
1382 			mp->mime = "application/x-mam";
1383 			goto qualify;
1384 		}
1385 		if (mp->identifier[ID_FORTRAN] >= 8)
1386 		{
1387 		id_fortran:
1388 			s = T("fortran program");
1389 			mp->mime = "application/x-fortran";
1390 			goto qualify;
1391 		}
1392 		if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
1393 		{
1394 		id_html:
1395 			s = T("html input");
1396 			mp->mime = "text/html";
1397 			goto qualify;
1398 		}
1399 		if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1400 		{
1401 		id_copybook:
1402 			s = T("cobol copybook");
1403 			mp->mime = "application/x-cobol";
1404 			goto qualify;
1405 		}
1406 		if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1407 		{
1408 		id_cobol:
1409 			s = T("cobol program");
1410 			mp->mime = "application/x-cobol";
1411 			goto qualify;
1412 		}
1413 		if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1414 		{
1415 		id_pl1:
1416 			s = T("pl1 program");
1417 			mp->mime = "application/x-pl1";
1418 			goto qualify;
1419 		}
1420 		if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
1421 		{
1422 		id_tex:
1423 			s = T("TeX input");
1424 			mp->mime = "text/tex";
1425 			goto qualify;
1426 		}
1427 		if (mp->identifier[ID_ASM] >= 4)
1428 		{
1429 		id_asm:
1430 			s = T("as program");
1431 			mp->mime = "application/x-as";
1432 			goto qualify;
1433 		}
1434 		if (ckenglish(mp, pun, badpun))
1435 		{
1436 			s = T("english text");
1437 			mp->mime = "text/plain";
1438 			goto qualify;
1439 		}
1440 	}
1441 	else if (streq(base, "core"))
1442 	{
1443 		mp->mime = "x-system/core";
1444 		return T("core dump");
1445 	}
1446 	if (flags & (CC_binary|CC_notext))
1447 	{
1448 		b = (unsigned char*)mp->fbuf;
1449 		e = b + mp->fbsz;
1450 		n = 0;
1451 		for (;;)
1452 		{
1453 			c = *b++;
1454 			q = 0;
1455 			while (c & 0x80)
1456 			{
1457 				c <<= 1;
1458 				q++;
1459 			}
1460 			switch (q)
1461 			{
1462 			case 4:
1463 				if (b < e && (*b++ & 0xc0) != 0x80)
1464 					break;
1465 				/* FALLTHROUGH */
1466 			case 3:
1467 				if (b < e && (*b++ & 0xc0) != 0x80)
1468 					break;
1469 				/* FALLTHROUGH */
1470 			case 2:
1471 				if (b < e && (*b++ & 0xc0) != 0x80)
1472 					break;
1473 				n = 1;
1474 				/* FALLTHROUGH */
1475 			case 0:
1476 				if (b >= e)
1477 				{
1478 					if (n)
1479 					{
1480 						flags &= ~(CC_binary|CC_notext);
1481 						flags |= CC_utf_8;
1482 					}
1483 					break;
1484 				}
1485 				continue;
1486 			}
1487 			break;
1488 		}
1489 	}
1490 	if (flags & (CC_binary|CC_notext))
1491 	{
1492 		unsigned long	d = 0;
1493 
1494 		if ((q = mp->fbsz / UCHAR_MAX) >= 2)
1495 		{
1496 			/*
1497 			 * compression/encryption via standard deviation
1498 			 */
1499 
1500 
1501 			for (c = 0; c < UCHAR_MAX; c++)
1502 			{
1503 				pun = mp->count[c] - q;
1504 				d += pun * pun;
1505 			}
1506 			d /= mp->fbsz;
1507 		}
1508 		if (d <= 0)
1509 			s = T("binary");
1510 		else if (d < 4)
1511 			s = T("encrypted");
1512 		else if (d < 16)
1513 			s = T("packed");
1514 		else if (d < 64)
1515 			s = T("compressed");
1516 		else if (d < 256)
1517 			s = T("delta");
1518 		else
1519 			s = T("data");
1520 		mp->mime = "application/octet-stream";
1521 		return s;
1522 	}
1523 	mp->mime = "text/plain";
1524 	if (flags & CC_utf_8)
1525 		s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
1526 	else if (flags & CC_latin)
1527 		s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
1528 	else
1529 		s = (flags & CC_control) ? T("text with control characters") : T("text");
1530  qualify:
1531 	if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
1532 	{
1533 		t = "dos ";
1534 		mp->mime = "text/dos";
1535 	}
1536 	else
1537 		t = "";
1538 	if (code)
1539 	{
1540 		if (code == CC_ASCII)
1541 			sfsprintf(buf, end - buf, "ascii %s%s", t, s);
1542 		else
1543 		{
1544 			sfsprintf(buf, end - buf, "ebcdic%d %s%s", code - 1, t, s);
1545 			mp->mime = "text/ebcdic";
1546 		}
1547 		s = buf;
1548 	}
1549 	else if (*t)
1550 	{
1551 		sfsprintf(buf, end - buf, "%s%s", t, s);
1552 		s = buf;
1553 	}
1554 	return s;
1555 }
1556 
1557 /*
1558  * return the basic magic string for file,st in buf,size
1559  */
1560 
1561 static char*
type(register Magic_t * mp,const char * file,struct stat * st,char * buf,char * end)1562 type(register Magic_t* mp, const char* file, struct stat* st, char* buf, char* end)
1563 {
1564 	register char*	s;
1565 	register char*	t;
1566 
1567 	mp->mime = 0;
1568 	if (!S_ISREG(st->st_mode))
1569 	{
1570 		if (S_ISDIR(st->st_mode))
1571 		{
1572 			mp->mime = "x-system/dir";
1573 			return T("directory");
1574 		}
1575 		if (S_ISLNK(st->st_mode))
1576 		{
1577 			mp->mime = "x-system/lnk";
1578 			s = buf;
1579 			s += sfsprintf(s, end - s, T("symbolic link to "));
1580 			if (pathgetlink(file, s, end - s) < 0)
1581 				return T("cannot read symbolic link text");
1582 			return buf;
1583 		}
1584 		if (S_ISBLK(st->st_mode))
1585 		{
1586 			mp->mime = "x-system/blk";
1587 			sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
1588 			return buf;
1589 		}
1590 		if (S_ISCHR(st->st_mode))
1591 		{
1592 			mp->mime = "x-system/chr";
1593 			sfsprintf(buf, end - buf, T("character special (%s)"), fmtdev(st));
1594 			return buf;
1595 		}
1596 		if (S_ISFIFO(st->st_mode))
1597 		{
1598 			mp->mime = "x-system/fifo";
1599 			return "fifo";
1600 		}
1601 #ifdef S_ISSOCK
1602 		if (S_ISSOCK(st->st_mode))
1603 		{
1604 			mp->mime = "x-system/sock";
1605 			return "socket";
1606 		}
1607 #endif
1608 	}
1609 	if (!(mp->fbmx = st->st_size))
1610 		s = T("empty");
1611 	else if (!mp->fp)
1612 		s = T("cannot read");
1613 	else
1614 	{
1615 		mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
1616 		if (mp->fbsz < 0)
1617 			s = fmterror(errno);
1618 		else if (mp->fbsz == 0)
1619 			s = T("empty");
1620 		else
1621 		{
1622 			mp->fbuf[mp->fbsz] = 0;
1623 			mp->xoff = 0;
1624 			mp->xbsz = 0;
1625 			if (!(s = ckmagic(mp, file, buf, end, st, 0)))
1626 				s = cklang(mp, file, buf, end, st);
1627 		}
1628 	}
1629 	if (!mp->mime)
1630 		mp->mime = "application/unknown";
1631 	else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
1632 	{
1633 		register char*	b;
1634 		register char*	be;
1635 		register char*	m;
1636 		register char*	me;
1637 
1638 		b = mp->mime;
1639 		me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
1640 		while (m < me && b < t)
1641 			*m++ = *b++;
1642 		b = t = s;
1643 		for (;;)
1644 		{
1645 			if (!(be = strchr(t, ' ')))
1646 			{
1647 				be = b + strlen(b);
1648 				break;
1649 			}
1650 			if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
1651 				break;
1652 			b = t;
1653 			t = be + 1;
1654 		}
1655 		while (m < me && b < be)
1656 			if ((*m++ = *b++) == ' ')
1657 				*(m - 1) = '-';
1658 		*m = 0;
1659 	}
1660 	return s;
1661 }
1662 
1663 /*
1664  * low level for magicload()
1665  */
1666 
1667 static int
load(register Magic_t * mp,char * file,register Sfio_t * fp)1668 load(register Magic_t* mp, char* file, register Sfio_t* fp)
1669 {
1670 	register Entry_t*	ep;
1671 	register char*		p;
1672 	register char*		p2;
1673 	char*			p3;
1674 	char*			next;
1675 	int			n;
1676 	int			lge;
1677 	int			lev;
1678 	int			ent;
1679 	int			old;
1680 	int			cont;
1681 	Info_t*			ip;
1682 	Entry_t*		ret;
1683 	Entry_t*		first;
1684 	Entry_t*		last = 0;
1685 	Entry_t*		fun['z' - 'a' + 1];
1686 
1687 	memzero(fun, sizeof(fun));
1688 	cont = '$';
1689 	ent = 0;
1690 	lev = 0;
1691 	old = 0;
1692 	ret = 0;
1693 	error_info.file = file;
1694 	error_info.line = 0;
1695 	first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1696 	while (p = sfgetr(fp, '\n', 1))
1697 	{
1698 		error_info.line++;
1699 		for (; isspace(*p); p++);
1700 
1701 		/*
1702 		 * nesting
1703 		 */
1704 
1705 		switch (*p)
1706 		{
1707 		case 0:
1708 		case '#':
1709 			cont = '#';
1710 			continue;
1711 		case '{':
1712 			if (++lev < MAXNEST)
1713 				ep->nest = *p;
1714 			else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1715 				(*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
1716 			continue;
1717 		case '}':
1718 			if (!last || lev <= 0)
1719 			{
1720 				if (mp->disc->errorf)
1721 					(*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
1722 			}
1723 			else if (lev-- == ent)
1724 			{
1725 				ent = 0;
1726 				ep->cont = ':';
1727 				ep->offset = ret->offset;
1728 				ep->nest = ' ';
1729 				ep->type = ' ';
1730 				ep->op = ' ';
1731 				ep->desc = "[RETURN]";
1732 				last = ep;
1733 				ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1734 				ret = 0;
1735 			}
1736 			else
1737 				last->nest = *p;
1738 			continue;
1739 		default:
1740 			if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
1741 			{
1742 				n = *p++;
1743 				if (n >= 'a' && n <= 'z')
1744 					n -= 'a';
1745 				else
1746 				{
1747 					if (mp->disc->errorf)
1748 						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
1749 					n = 0;
1750 				}
1751 				if (ret && mp->disc->errorf)
1752 					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
1753 				if (*p == '{')
1754 				{
1755 					ent = ++lev;
1756 					ret = ep;
1757 					ep->desc = "[FUNCTION]";
1758 				}
1759 				else
1760 				{
1761 					if (*(p + 1) != ')' && mp->disc->errorf)
1762 						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
1763 					ep->desc = "[CALL]";
1764 				}
1765 				ep->cont = cont;
1766 				ep->offset = n;
1767 				ep->nest = ' ';
1768 				ep->type = ' ';
1769 				ep->op = ' ';
1770 				last = ep;
1771 				ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1772 				if (ret)
1773 					fun[n] = last->value.lab = ep;
1774 				else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
1775 					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
1776 				continue;
1777 			}
1778 			if (!ep->nest)
1779 				ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
1780 			break;
1781 		}
1782 
1783 		/*
1784 		 * continuation
1785 		 */
1786 
1787 		cont = '$';
1788 		switch (*p)
1789 		{
1790 		case '>':
1791 			old = 1;
1792 			if (*(p + 1) == *p)
1793 			{
1794 				/*
1795 				 * old style nesting push
1796 				 */
1797 
1798 				p++;
1799 				old = 2;
1800 				if (!lev && last)
1801 				{
1802 					lev = 1;
1803 					last->nest = '{';
1804 					if (last->cont == '>')
1805 						last->cont = '&';
1806 					ep->nest = '1';
1807 				}
1808 			}
1809 			/*FALLTHROUGH*/
1810 		case '+':
1811 		case '&':
1812 		case '|':
1813 			ep->cont = *p++;
1814 			break;
1815 		default:
1816 			if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
1817 				(*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
1818 			/*FALLTHROUGH*/
1819 		case '*':
1820 		case '0': case '1': case '2': case '3': case '4':
1821 		case '5': case '6': case '7': case '8': case '9':
1822 			ep->cont = (lev > 0) ? '&' : '#';
1823 			break;
1824 		}
1825 		switch (old)
1826 		{
1827 		case 1:
1828 			old = 0;
1829 			if (lev)
1830 			{
1831 				/*
1832 				 * old style nesting pop
1833 				 */
1834 
1835 				lev = 0;
1836 				if (last)
1837 					last->nest = '}';
1838 				ep->nest = ' ';
1839 				if (ep->cont == '&')
1840 					ep->cont = '#';
1841 			}
1842 			break;
1843 		case 2:
1844 			old = 1;
1845 			break;
1846 		}
1847 		if (isdigit(*p))
1848 		{
1849 			/*
1850 			 * absolute offset
1851 			 */
1852 
1853 			ep->offset = strton(p, &next, NiL, 0);
1854 			p2 = next;
1855 		}
1856 		else
1857 		{
1858 			for (p2 = p; *p2 && !isspace(*p2); p2++);
1859 			if (!*p2)
1860 			{
1861 				if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1862 					(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1863 				continue;
1864 			}
1865 
1866 			/*
1867 			 * offset expression
1868 			 */
1869 
1870 			*p2++ = 0;
1871 			ep->expr = vmstrdup(mp->vm, p);
1872 			if (isalpha(*p))
1873 				ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
1874 			else if (*p == '(' && ep->cont == '>')
1875 			{
1876 				/*
1877 				 * convert old style indirection to @
1878 				 */
1879 
1880 				p = ep->expr + 1;
1881 				for (;;)
1882 				{
1883 					switch (*p++)
1884 					{
1885 					case 0:
1886 					case '@':
1887 					case '(':
1888 						break;
1889 					case ')':
1890 						break;
1891 					default:
1892 						continue;
1893 					}
1894 					break;
1895 				}
1896 				if (*--p == ')')
1897 				{
1898 					*p = 0;
1899 					*ep->expr = '@';
1900 				}
1901 			}
1902 		}
1903 		for (; isspace(*p2); p2++);
1904 		for (p = p2; *p2 && !isspace(*p2); p2++);
1905 		if (!*p2)
1906 		{
1907 			if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1908 				(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1909 			continue;
1910 		}
1911 		*p2++ = 0;
1912 
1913 		/*
1914 		 * type
1915 		 */
1916 
1917 		if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
1918 		{
1919 			ep->swap = ~(*p == 'l' ? 7 : 0);
1920 			p += 2;
1921 		}
1922 		if (*p == 's')
1923 		{
1924 			if (*(p + 1) == 'h')
1925 				ep->type = 'h';
1926 			else
1927 				ep->type = 's';
1928 		}
1929 		else if (*p == 'a')
1930 			ep->type = 's';
1931 		else
1932 			ep->type = *p;
1933 		if (p = strchr(p, '&'))
1934 		{
1935 			/*
1936 			 * old style mask
1937 			 */
1938 
1939 			ep->mask = strton(++p, NiL, NiL, 0);
1940 		}
1941 		for (; isspace(*p2); p2++);
1942 		if (ep->mask)
1943 			*--p2 = '=';
1944 
1945 		/*
1946 		 * comparison operation
1947 		 */
1948 
1949 		p = p2;
1950 		if (p2 = strchr(p, '\t'))
1951 			*p2++ = 0;
1952 		else
1953 		{
1954 			int	qe = 0;
1955 			int	qn = 0;
1956 
1957 			/*
1958 			 * assume balanced {}[]()\\""'' field
1959 			 */
1960 
1961 			for (p2 = p;;)
1962 			{
1963 				switch (n = *p2++)
1964 				{
1965 				case 0:
1966 					break;
1967 				case '{':
1968 					if (!qe)
1969 						qe = '}';
1970 					if (qe == '}')
1971 						qn++;
1972 					continue;
1973 				case '(':
1974 					if (!qe)
1975 						qe = ')';
1976 					if (qe == ')')
1977 						qn++;
1978 					continue;
1979 				case '[':
1980 					if (!qe)
1981 						qe = ']';
1982 					if (qe == ']')
1983 						qn++;
1984 					continue;
1985 				case '}':
1986 				case ')':
1987 				case ']':
1988 					if (qe == n && qn > 0)
1989 						qn--;
1990 					continue;
1991 				case '"':
1992 				case '\'':
1993 					if (!qe)
1994 						qe = n;
1995 					else if (qe == n)
1996 						qe = 0;
1997 					continue;
1998 				case '\\':
1999 					if (*p2)
2000 						p2++;
2001 					continue;
2002 				default:
2003 					if (!qe && isspace(n))
2004 						break;
2005 					continue;
2006 				}
2007 				if (n)
2008 					*(p2 - 1) = 0;
2009 				else
2010 					p2--;
2011 				break;
2012 			}
2013 		}
2014 		lge = 0;
2015 		if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
2016 			ep->op = '=';
2017 		else
2018 		{
2019 			if (*p == '&')
2020 			{
2021 				ep->mask = strton(++p, &next, NiL, 0);
2022 				p = next;
2023 			}
2024 			switch (*p)
2025 			{
2026 			case '=':
2027 			case '>':
2028 			case '<':
2029 			case '*':
2030 				ep->op = *p++;
2031 				if (*p == '=')
2032 				{
2033 					p++;
2034 					switch (ep->op)
2035 					{
2036 					case '>':
2037 						lge = -1;
2038 						break;
2039 					case '<':
2040 						lge = 1;
2041 						break;
2042 					}
2043 				}
2044 				break;
2045 			case '!':
2046 			case '@':
2047 				ep->op = *p++;
2048 				if (*p == '=')
2049 					p++;
2050 				break;
2051 			case 'x':
2052 				p++;
2053 				ep->op = '*';
2054 				break;
2055 			default:
2056 				ep->op = '=';
2057 				if (ep->mask)
2058 					ep->value.num = ep->mask;
2059 				break;
2060 			}
2061 		}
2062 		if (ep->op != '*' && !ep->value.num)
2063 		{
2064 			if (ep->type == 'e')
2065 			{
2066 				if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
2067 				{
2068 					ep->value.sub->re_disc = &mp->redisc;
2069 					if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
2070 					{
2071 						p += ep->value.sub->re_npat;
2072 						if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
2073 							p += ep->value.sub->re_npat;
2074 					}
2075 					if (n)
2076 					{
2077 						regmessage(mp, ep->value.sub, n);
2078 						ep->value.sub = 0;
2079 					}
2080 					else if (*p && mp->disc->errorf)
2081 						(*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
2082 				}
2083 			}
2084 			else if (ep->type == 'm')
2085 			{
2086 				ep->mask = stresc(p) + 1;
2087 				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
2088 				memcpy(ep->value.str, p, ep->mask);
2089 				if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
2090 					ep->value.str[ep->mask - 1] = '*';
2091 			}
2092 			else if (ep->type == 's')
2093 			{
2094 				ep->mask = stresc(p);
2095 				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
2096 				memcpy(ep->value.str, p, ep->mask);
2097 			}
2098 			else if (*p == '\'')
2099 			{
2100 				stresc(p);
2101 				ep->value.num = *(unsigned char*)(p + 1) + lge;
2102 			}
2103 			else if (strmatch(p, "+([a-z])\\(*\\)"))
2104 			{
2105 				char*	t;
2106 
2107 				t = p;
2108 				ep->type = 'V';
2109 				ep->op = *p;
2110 				while (*p && *p++ != '(');
2111 				switch (ep->op)
2112 				{
2113 				case 'l':
2114 					n = *p++;
2115 					if (n < 'a' || n > 'z')
2116 					{
2117 						if (mp->disc->errorf)
2118 							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
2119 					}
2120 					else if (!fun[n -= 'a'])
2121 					{
2122 						if (mp->disc->errorf)
2123 							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
2124 					}
2125 					else
2126 					{
2127 						ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
2128 						ep->value.loop->lab = fun[n];
2129 						while (*p && *p++ != ',');
2130 						ep->value.loop->start = strton(p, &t, NiL, 0);
2131 						while (*t && *t++ != ',');
2132 						ep->value.loop->size = strton(t, &t, NiL, 0);
2133 					}
2134 					break;
2135 				case 'm':
2136 				case 'r':
2137 					ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
2138 					ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
2139 					break;
2140 				case 'v':
2141 					break;
2142 				default:
2143 					if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2144 						(*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
2145 					break;
2146 				}
2147 			}
2148 			else
2149 			{
2150 				ep->value.num = strton(p, NiL, NiL, 0) + lge;
2151 				if (ep->op == '@')
2152 					ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
2153 			}
2154 		}
2155 
2156 		/*
2157 		 * file description
2158 		 */
2159 
2160 		if (p2)
2161 		{
2162 			for (; isspace(*p2); p2++);
2163 			if (p = strchr(p2, '\t'))
2164 			{
2165 				/*
2166 				 * check for message catalog index
2167 				 */
2168 
2169 				*p++ = 0;
2170 				if (isalpha(*p2))
2171 				{
2172 					for (p3 = p2; isalnum(*p3); p3++);
2173 					if (*p3++ == ':')
2174 					{
2175 						for (; isdigit(*p3); p3++);
2176 						if (!*p3)
2177 						{
2178 							for (p2 = p; isspace(*p2); p2++);
2179 							if (p = strchr(p2, '\t'))
2180 								*p++ = 0;
2181 						}
2182 					}
2183 				}
2184 			}
2185 			stresc(p2);
2186 			ep->desc = vmstrdup(mp->vm, p2);
2187 			if (p)
2188 			{
2189 				for (; isspace(*p); p++);
2190 				if (*p)
2191 					ep->mime = vmstrdup(mp->vm, p);
2192 			}
2193 		}
2194 		else
2195 			ep->desc = "";
2196 
2197 		/*
2198 		 * get next entry
2199 		 */
2200 
2201 		last = ep;
2202 		ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
2203 	}
2204 	if (last)
2205 	{
2206 		last->next = 0;
2207 		if (mp->magiclast)
2208 			mp->magiclast->next = first;
2209 		else
2210 			mp->magic = first;
2211 		mp->magiclast = last;
2212 	}
2213 	vmfree(mp->vm, ep);
2214 	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2215 	{
2216 		if (lev < 0)
2217 			(*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
2218 		else if (lev > 0)
2219 			(*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
2220 		if (ret)
2221 			(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
2222 	}
2223 	error_info.file = 0;
2224 	error_info.line = 0;
2225 	return 0;
2226 }
2227 
2228 /*
2229  * load a magic file into mp
2230  */
2231 
2232 int
magicload(register Magic_t * mp,const char * file,unsigned long flags)2233 magicload(register Magic_t* mp, const char* file, unsigned long flags)
2234 {
2235 	register char*		s;
2236 	register char*		e;
2237 	register char*		t;
2238 	int			n;
2239 	int			found;
2240 	int			list;
2241 	Sfio_t*			fp;
2242 
2243 	mp->flags = mp->disc->flags | flags;
2244 	found = 0;
2245 	if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
2246 	{
2247 		if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
2248 			s = MAGIC_FILE;
2249 	}
2250 	for (;;)
2251 	{
2252 		if (!list)
2253 			e = 0;
2254 		else if (e = strchr(s, ':'))
2255 		{
2256 			/*
2257 			 * ok, so ~ won't work for the last list element
2258 			 * we do it for MAGIC_FILES_ENV anyway
2259 			 */
2260 
2261 			if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
2262 			{
2263 				sfputr(mp->tmp, t, -1);
2264 				s += n - 1;
2265 			}
2266 			sfwrite(mp->tmp, s, e - s);
2267 			if (!(s = sfstruse(mp->tmp)))
2268 				goto nospace;
2269 		}
2270 		if (!*s || streq(s, "-"))
2271 			s = MAGIC_FILE;
2272 		if (!(fp = sfopen(NiL, s, "r")))
2273 		{
2274 			if (list)
2275 			{
2276 				if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))) && !strchr(s, '/'))
2277 				{
2278 					strcpy(mp->fbuf, s);
2279 					sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
2280 					if (!(s = sfstruse(mp->tmp)))
2281 						goto nospace;
2282 					if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))))
2283 						goto next;
2284 				}
2285 				if (!(fp = sfopen(NiL, t, "r")))
2286 					goto next;
2287 			}
2288 			else
2289 			{
2290 				if (mp->disc->errorf)
2291 					(*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
2292 				return -1;
2293 			}
2294 		}
2295 		found = 1;
2296 		n = load(mp, s, fp);
2297 		sfclose(fp);
2298 		if (n && !list)
2299 			return -1;
2300 	next:
2301 		if (!e)
2302 			break;
2303 		s = e + 1;
2304 	}
2305 	if (!found)
2306 	{
2307 		if (mp->flags & MAGIC_VERBOSE)
2308 		{
2309 			if (mp->disc->errorf)
2310 				(*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
2311 		}
2312 		return -1;
2313 	}
2314 	return 0;
2315  nospace:
2316 	if (mp->disc->errorf)
2317 		(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
2318 	return -1;
2319 }
2320 
2321 /*
2322  * open a magic session
2323  */
2324 
2325 Magic_t*
magicopen(Magicdisc_t * disc)2326 magicopen(Magicdisc_t* disc)
2327 {
2328 	register Magic_t*	mp;
2329 	register int		i;
2330 	register int		n;
2331 	register int		f;
2332 	register int		c;
2333 	register Vmalloc_t*	vm;
2334 	unsigned char*		map[CC_MAPS + 1];
2335 
2336 	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
2337 		return 0;
2338 	if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
2339 	{
2340 		vmclose(vm);
2341 		return 0;
2342 	}
2343 	mp->id = lib;
2344 	mp->disc = disc;
2345 	mp->vm = vm;
2346 	mp->flags = disc->flags;
2347 	mp->redisc.re_version = REG_VERSION;
2348 	mp->redisc.re_flags = REG_NOFREE;
2349 	mp->redisc.re_errorf = (regerror_t)disc->errorf;
2350 	mp->redisc.re_resizef = (regresize_t)vmgetmem;
2351 	mp->redisc.re_resizehandle = (void*)mp->vm;
2352 	mp->dtdisc.key = offsetof(Info_t, name);
2353 	mp->dtdisc.link = offsetof(Info_t, link);
2354 	if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dtoset)))
2355 		goto bad;
2356 	for (n = 0; n < elementsof(info); n++)
2357 		dtinsert(mp->infotab, &info[n]);
2358 	for (i = 0; i < CC_MAPS; i++)
2359 		map[i] = ccmap(i, CC_ASCII);
2360 	mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
2361 	for (n = 0; n <= UCHAR_MAX; n++)
2362 	{
2363 		f = 0;
2364 		i = CC_MAPS;
2365 		while (--i >= 0)
2366 		{
2367 			c = ccmapchr(map[i], n);
2368 			f = (f << CC_BIT) | CCTYPE(c);
2369 		}
2370 		mp->cctype[n] = f;
2371 	}
2372 	return mp;
2373  bad:
2374 	magicclose(mp);
2375 	return 0;
2376 }
2377 
2378 /*
2379  * close a magicopen() session
2380  */
2381 
2382 int
magicclose(register Magic_t * mp)2383 magicclose(register Magic_t* mp)
2384 {
2385 	if (!mp)
2386 		return -1;
2387 	if (mp->tmp)
2388 		sfstrclose(mp->tmp);
2389 	if (mp->vm)
2390 		vmclose(mp->vm);
2391 	return 0;
2392 }
2393 
2394 /*
2395  * return the magic string for file with optional stat info st
2396  */
2397 
2398 char*
magictype(register Magic_t * mp,Sfio_t * fp,const char * file,register struct stat * st)2399 magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
2400 {
2401 	off_t	off;
2402 	char*	s;
2403 
2404 	mp->flags = mp->disc->flags;
2405 	mp->mime = 0;
2406 	if (!st)
2407 		s = T("cannot stat");
2408 	else
2409 	{
2410 		if (mp->fp = fp)
2411 			off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
2412 		s = type(mp, file, st, mp->tbuf, &mp->tbuf[sizeof(mp->tbuf)-1]);
2413 		if (mp->fp)
2414 			sfseek(mp->fp, off, SEEK_SET);
2415 		if (!(mp->flags & (MAGIC_MIME|MAGIC_ALL)))
2416 		{
2417 			if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
2418 				sfprintf(mp->tmp, "%s ", T("short"));
2419 			sfprintf(mp->tmp, "%s", s);
2420 			if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
2421 				sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
2422 			if (st->st_mode & S_ISUID)
2423 				sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
2424 			if (st->st_mode & S_ISGID)
2425 				sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
2426 			if (st->st_mode & S_ISVTX)
2427 				sfprintf(mp->tmp, ", sticky");
2428 			if (!(s = sfstruse(mp->tmp)))
2429 				s = T("out of space");
2430 		}
2431 	}
2432 	if (mp->flags & MAGIC_MIME)
2433 		s = mp->mime;
2434 	if (!s)
2435 		s = T("error");
2436 	return s;
2437 }
2438 
2439 /*
2440  * list the magic table in mp on sp
2441  */
2442 
2443 int
magiclist(register Magic_t * mp,register Sfio_t * sp)2444 magiclist(register Magic_t* mp, register Sfio_t* sp)
2445 {
2446 	register Entry_t*	ep = mp->magic;
2447 	register Entry_t*	rp = 0;
2448 
2449 	mp->flags = mp->disc->flags;
2450 	sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
2451 	while (ep)
2452 	{
2453 		sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
2454 		if (ep->expr)
2455 			sfprintf(sp, "%s", ep->expr);
2456 		else
2457 			sfprintf(sp, "%ld", ep->offset);
2458 		sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
2459 		switch (ep->type)
2460 		{
2461 		case 'm':
2462 		case 's':
2463 			sfputr(sp, fmtesc(ep->value.str), -1);
2464 			break;
2465 		case 'V':
2466 			switch (ep->op)
2467 			{
2468 			case 'l':
2469 				sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
2470 				break;
2471 			case 'v':
2472 				sfprintf(sp, "vcodex()");
2473 				break;
2474 			default:
2475 				sfprintf(sp, "%p", ep->value.str);
2476 				break;
2477 			}
2478 			break;
2479 		default:
2480 			sfprintf(sp, "%lo", ep->value.num);
2481 			break;
2482 		}
2483 		sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
2484 		if (ep->cont == '$' && !ep->value.lab->mask)
2485 		{
2486 			rp = ep;
2487 			ep = ep->value.lab;
2488 		}
2489 		else
2490 		{
2491 			if (ep->cont == ':')
2492 			{
2493 				ep = rp;
2494 				ep->value.lab->mask = 1;
2495 			}
2496 			ep = ep->next;
2497 		}
2498 	}
2499 	return 0;
2500 }
2501