1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2011 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24 * Glenn Fowler
25 * AT&T Research
26 *
27 * library interface to file
28 *
29 * the sum of the hacks {s5,v10,planix} is _____ than the parts
30 */
31
32 static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2011-03-09 $\0\n";
33
34 static const char lib[] = "libast:magic";
35
36 #include <ast.h>
37 #include <ctype.h>
38 #include <ccode.h>
39 #include <dt.h>
40 #include <modex.h>
41 #include <error.h>
42 #include <regex.h>
43 #include <swap.h>
44
45 #define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m)
46
47 #define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
48
49 #define MAXNEST 10 /* { ... } nesting limit */
50 #define MINITEM 4 /* magic buffer rounding */
51
52 typedef struct /* identifier dictionary entry */
53 {
54 const char name[16]; /* identifier name */
55 int value; /* identifier value */
56 Dtlink_t link; /* dictionary link */
57 } Info_t;
58
59 typedef struct Edit /* edit substitution */
60 {
61 struct Edit* next; /* next in list */
62 regex_t* from; /* from pattern */
63 } Edit_t;
64
65 struct Entry;
66
67 typedef struct /* loop info */
68 {
69 struct Entry* lab; /* call this function */
70 int start; /* start here */
71 int size; /* increment by this amount */
72 int count; /* dynamic loop count */
73 int offset; /* dynamic offset */
74 } Loop_t;
75
76 typedef struct Entry /* magic file entry */
77 {
78 struct Entry* next; /* next in list */
79 char* expr; /* offset expression */
80 union
81 {
82 unsigned long num;
83 char* str;
84 struct Entry* lab;
85 regex_t* sub;
86 Loop_t* loop;
87 } value; /* comparison value */
88 char* desc; /* file description */
89 char* mime; /* file mime type */
90 unsigned long offset; /* offset in bytes */
91 unsigned long mask; /* mask before compare */
92 char cont; /* continuation operation */
93 char type; /* datum type */
94 char op; /* comparison operation */
95 char nest; /* { or } nesting operation */
96 char swap; /* forced swap order */
97 } Entry_t;
98
99 #define CC_BIT 5
100
101 #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
102 typedef unsigned short Cctype_t;
103 #else
104 typedef unsigned long Cctype_t;
105 #endif
106
107 #define CC_text 0x01
108 #define CC_control 0x02
109 #define CC_latin 0x04
110 #define CC_binary 0x08
111 #define CC_utf_8 0x10
112
113 #define CC_notext CC_text /* CC_text is flipped before checking */
114
115 #define CC_MASK (CC_binary|CC_latin|CC_control|CC_text)
116
117 #define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
118
119 #define ID_NONE 0
120 #define ID_ASM 1
121 #define ID_C 2
122 #define ID_COBOL 3
123 #define ID_COPYBOOK 4
124 #define ID_CPLUSPLUS 5
125 #define ID_FORTRAN 6
126 #define ID_HTML 7
127 #define ID_INCL1 8
128 #define ID_INCL2 9
129 #define ID_INCL3 10
130 #define ID_MAM1 11
131 #define ID_MAM2 12
132 #define ID_MAM3 13
133 #define ID_NOTEXT 14
134 #define ID_PL1 15
135 #define ID_YACC 16
136
137 #define ID_MAX ID_YACC
138
139 #define INFO_atime 1
140 #define INFO_blocks 2
141 #define INFO_ctime 3
142 #define INFO_fstype 4
143 #define INFO_gid 5
144 #define INFO_mode 6
145 #define INFO_mtime 7
146 #define INFO_name 8
147 #define INFO_nlink 9
148 #define INFO_size 10
149 #define INFO_uid 11
150
151 #define _MAGIC_PRIVATE_ \
152 Magicdisc_t* disc; /* discipline */ \
153 Vmalloc_t* vm; /* vmalloc region */ \
154 Entry_t* magic; /* parsed magic table */ \
155 Entry_t* magiclast; /* last entry in magic */ \
156 char* mime; /* MIME type */ \
157 unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \
158 char fbuf[SF_BUFSIZE + 1]; /* file data */ \
159 char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \
160 char nbuf[256]; /* !CC_NATIVE data */ \
161 char mbuf[64]; /* mime string */ \
162 char sbuf[64]; /* type suffix string */ \
163 char tbuf[2 * PATH_MAX]; /* type string */ \
164 Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \
165 unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \
166 unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \
167 int keep[MAXNEST]; /* ckmagic nest stack */ \
168 char* cap[MAXNEST]; /* ckmagic mime stack */ \
169 char* msg[MAXNEST]; /* ckmagic text stack */ \
170 Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \
171 int fbsz; /* fbuf size */ \
172 int fbmx; /* fbuf max size */ \
173 int xbsz; /* xbuf size */ \
174 int swap; /* swap() operation */ \
175 unsigned long flags; /* disc+open flags */ \
176 long xoff; /* xbuf offset */ \
177 int identifier[ID_MAX + 1]; /* Info_t identifier */ \
178 Sfio_t* fp; /* fbuf fp */ \
179 Sfio_t* tmp; /* tmp string */ \
180 regdisc_t redisc; /* regex discipline */ \
181 Dtdisc_t dtdisc; /* dict discipline */ \
182 Dt_t* idtab; /* identifier dict */ \
183 Dt_t* infotab; /* info keyword dict */
184
185 #include <magic.h>
186
187 static Info_t dict[] = /* keyword dictionary */
188 {
189 { "COMMON", ID_FORTRAN },
190 { "COMPUTE", ID_COBOL },
191 { "COMP", ID_COPYBOOK },
192 { "COMPUTATIONAL",ID_COPYBOOK },
193 { "DCL", ID_PL1 },
194 { "DEFINED", ID_PL1 },
195 { "DIMENSION", ID_FORTRAN },
196 { "DIVISION", ID_COBOL },
197 { "FILLER", ID_COPYBOOK },
198 { "FIXED", ID_PL1 },
199 { "FUNCTION", ID_FORTRAN },
200 { "HTML", ID_HTML },
201 { "INTEGER", ID_FORTRAN },
202 { "MAIN", ID_PL1 },
203 { "OPTIONS", ID_PL1 },
204 { "PERFORM", ID_COBOL },
205 { "PIC", ID_COPYBOOK },
206 { "REAL", ID_FORTRAN },
207 { "REDEFINES", ID_COPYBOOK },
208 { "S9", ID_COPYBOOK },
209 { "SECTION", ID_COBOL },
210 { "SELECT", ID_COBOL },
211 { "SUBROUTINE", ID_FORTRAN },
212 { "TEXT", ID_ASM },
213 { "VALUE", ID_COPYBOOK },
214 { "attr", ID_MAM3 },
215 { "binary", ID_YACC },
216 { "block", ID_FORTRAN },
217 { "bss", ID_ASM },
218 { "byte", ID_ASM },
219 { "char", ID_C },
220 { "class", ID_CPLUSPLUS },
221 { "clr", ID_NOTEXT },
222 { "comm", ID_ASM },
223 { "common", ID_FORTRAN },
224 { "data", ID_ASM },
225 { "dimension", ID_FORTRAN },
226 { "done", ID_MAM2 },
227 { "double", ID_C },
228 { "even", ID_ASM },
229 { "exec", ID_MAM3 },
230 { "extern", ID_C },
231 { "float", ID_C },
232 { "function", ID_FORTRAN },
233 { "globl", ID_ASM },
234 { "h", ID_INCL3 },
235 { "html", ID_HTML },
236 { "include", ID_INCL1 },
237 { "int", ID_C },
238 { "integer", ID_FORTRAN },
239 { "jmp", ID_NOTEXT },
240 { "left", ID_YACC },
241 { "libc", ID_INCL2 },
242 { "long", ID_C },
243 { "make", ID_MAM1 },
244 { "mov", ID_NOTEXT },
245 { "private", ID_CPLUSPLUS },
246 { "public", ID_CPLUSPLUS },
247 { "real", ID_FORTRAN },
248 { "register", ID_C },
249 { "right", ID_YACC },
250 { "sfio", ID_INCL2 },
251 { "static", ID_C },
252 { "stdio", ID_INCL2 },
253 { "struct", ID_C },
254 { "subroutine", ID_FORTRAN },
255 { "sys", ID_NOTEXT },
256 { "term", ID_YACC },
257 { "text", ID_ASM },
258 { "tst", ID_NOTEXT },
259 { "type", ID_YACC },
260 { "typedef", ID_C },
261 { "u", ID_INCL2 },
262 { "union", ID_YACC },
263 { "void", ID_C },
264 };
265
266 static Info_t info[] =
267 {
268 { "atime", INFO_atime },
269 { "blocks", INFO_blocks },
270 { "ctime", INFO_ctime },
271 { "fstype", INFO_fstype },
272 { "gid", INFO_gid },
273 { "mode", INFO_mode },
274 { "mtime", INFO_mtime },
275 { "name", INFO_name },
276 { "nlink", INFO_nlink },
277 { "size", INFO_size },
278 { "uid", INFO_uid },
279 };
280
281 /*
282 * return pointer to data at offset off and size siz
283 */
284
285 static char*
getdata(register Magic_t * mp,register long off,register int siz)286 getdata(register Magic_t* mp, register long off, register int siz)
287 {
288 register long n;
289
290 if (off < 0)
291 return 0;
292 if (off + siz <= mp->fbsz)
293 return mp->fbuf + off;
294 if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
295 {
296 if (off + siz > mp->fbmx)
297 return 0;
298 n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
299 if (sfseek(mp->fp, n, SEEK_SET) != n)
300 return 0;
301 if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
302 {
303 mp->xoff = 0;
304 mp->xbsz = 0;
305 return 0;
306 }
307 mp->xbuf[mp->xbsz] = 0;
308 mp->xoff = n;
309 if (off + siz > mp->xoff + mp->xbsz)
310 return 0;
311 }
312 return mp->xbuf + off - mp->xoff;
313 }
314
315 /*
316 * @... evaluator for strexpr()
317 */
318
319 static long
indirect(const char * cs,char ** e,void * handle)320 indirect(const char* cs, char** e, void* handle)
321 {
322 register char* s = (char*)cs;
323 register Magic_t* mp = (Magic_t*)handle;
324 register long n = 0;
325 register char* p;
326
327 if (s)
328 {
329 if (*s == '@')
330 {
331 n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
332 switch (*(s = *e))
333 {
334 case 'b':
335 case 'B':
336 s++;
337 if (p = getdata(mp, n, 1))
338 n = *(unsigned char*)p;
339 else
340 s = (char*)cs;
341 break;
342 case 'h':
343 case 'H':
344 s++;
345 if (p = getdata(mp, n, 2))
346 n = swapget(mp->swap, p, 2);
347 else
348 s = (char*)cs;
349 break;
350 case 'q':
351 case 'Q':
352 s++;
353 if (p = getdata(mp, n, 8))
354 n = swapget(mp->swap, p, 8);
355 else
356 s = (char*)cs;
357 break;
358 default:
359 if (isalnum(*s))
360 s++;
361 if (p = getdata(mp, n, 4))
362 n = swapget(mp->swap, p, 4);
363 else
364 s = (char*)cs;
365 break;
366 }
367 }
368 *e = s;
369 }
370 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
371 (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
372 return n;
373 }
374
375 /*
376 * emit regex error message
377 */
378
379 static void
regmessage(Magic_t * mp,regex_t * re,int code)380 regmessage(Magic_t* mp, regex_t* re, int code)
381 {
382 char buf[128];
383
384 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
385 {
386 regerror(code, re, buf, sizeof(buf));
387 (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
388 }
389 }
390
391 /*
392 * decompose vcodex(3) method composition
393 */
394
395 static char*
vcdecomp(char * b,char * e,unsigned char * m,unsigned char * x)396 vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
397 {
398 unsigned char* map;
399 const char* o;
400 int c;
401 int n;
402 int i;
403 int a;
404
405 map = CCMAP(CC_ASCII, CC_NATIVE);
406 a = 0;
407 i = 1;
408 for (;;)
409 {
410 if (i)
411 i = 0;
412 else
413 *b++ = '^';
414 if (m < (x - 1) && !*(m + 1))
415 {
416 /*
417 * obsolete indices
418 */
419
420 if (!a)
421 {
422 a = 1;
423 o = "old, ";
424 while (b < e && (c = *o++))
425 *b++ = c;
426 }
427 switch (*m)
428 {
429 case 0: o = "delta"; break;
430 case 1: o = "huffman"; break;
431 case 2: o = "huffgroup"; break;
432 case 3: o = "arith"; break;
433 case 4: o = "bwt"; break;
434 case 5: o = "rle"; break;
435 case 6: o = "mtf"; break;
436 case 7: o = "transpose"; break;
437 case 8: o = "table"; break;
438 case 9: o = "huffpart"; break;
439 case 50: o = "map"; break;
440 case 100: o = "recfm"; break;
441 case 101: o = "ss7"; break;
442 default: o = "UNKNOWN"; break;
443 }
444 m += 2;
445 while (b < e && (c = *o++))
446 *b++ = c;
447 }
448 else
449 while (b < e && m < x && (c = *m++))
450 {
451 if (map)
452 c = map[c];
453 *b++ = c;
454 }
455 if (b >= e)
456 break;
457 n = 0;
458 while (m < x)
459 {
460 n = (n<<7) | (*m & 0x7f);
461 if (!(*m++ & 0x80))
462 break;
463 }
464 if (n >= (x - m))
465 break;
466 m += n;
467 }
468 return b;
469 }
470
471 /*
472 * check for magic table match in buf
473 */
474
475 static char*
ckmagic(register Magic_t * mp,const char * file,char * buf,char * end,struct stat * st,unsigned long off)476 ckmagic(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st, unsigned long off)
477 {
478 register Entry_t* ep;
479 register char* p;
480 register char* b;
481 register int level = 0;
482 int call = -1;
483 int all = 0;
484 int c;
485 int str;
486 char* q;
487 char* t;
488 char* cur;
489 char* base = 0;
490 unsigned long num;
491 unsigned long mask;
492 regmatch_t matches[10];
493
494 mp->swap = 0;
495 b = mp->msg[0] = cur = buf;
496 mp->mime = mp->cap[0] = 0;
497 mp->keep[0] = 0;
498 for (ep = mp->magic; ep; ep = ep->next)
499 {
500 fun:
501 if (ep->nest == '{')
502 {
503 if (++level >= MAXNEST)
504 {
505 call = -1;
506 level = 0;
507 mp->keep[0] = 0;
508 b = mp->msg[0];
509 mp->mime = mp->cap[0];
510 continue;
511 }
512 mp->keep[level] = mp->keep[level - 1] != 0;
513 mp->msg[level] = b;
514 mp->cap[level] = mp->mime;
515 }
516 switch (ep->cont)
517 {
518 case '#':
519 if (mp->keep[level] && b > cur)
520 {
521 if ((mp->flags & MAGIC_ALL) && b < (end - 3))
522 {
523 all = 1;
524 *b++ = '\n';
525 cur = b;
526 continue;
527 }
528 *b = 0;
529 return buf;
530 }
531 mp->swap = 0;
532 b = mp->msg[0] = cur;
533 mp->mime = mp->cap[0] = 0;
534 if (ep->type == ' ')
535 continue;
536 break;
537 case '$':
538 if (mp->keep[level] && call < (MAXNEST - 1))
539 {
540 mp->ret[++call] = ep;
541 ep = ep->value.lab;
542 goto fun;
543 }
544 continue;
545 case ':':
546 ep = mp->ret[call--];
547 if (ep->op == 'l')
548 goto fun;
549 continue;
550 case '|':
551 if (mp->keep[level] > 1)
552 goto checknest;
553 /*FALLTHROUGH*/
554 default:
555 if (!mp->keep[level])
556 {
557 b = mp->msg[level];
558 mp->mime = mp->cap[level];
559 goto checknest;
560 }
561 break;
562 }
563 p = "";
564 num = 0;
565 if (!ep->expr)
566 num = ep->offset + off;
567 else
568 switch (ep->offset)
569 {
570 case 0:
571 num = strexpr(ep->expr, NiL, indirect, mp) + off;
572 break;
573 case INFO_atime:
574 num = st->st_atime;
575 ep->type = 'D';
576 break;
577 case INFO_blocks:
578 num = iblocks(st);
579 ep->type = 'N';
580 break;
581 case INFO_ctime:
582 num = st->st_ctime;
583 ep->type = 'D';
584 break;
585 case INFO_fstype:
586 p = fmtfs(st);
587 ep->type = toupper(ep->type);
588 break;
589 case INFO_gid:
590 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
591 {
592 p = fmtgid(st->st_gid);
593 ep->type = toupper(ep->type);
594 }
595 else
596 {
597 num = st->st_gid;
598 ep->type = 'N';
599 }
600 break;
601 case INFO_mode:
602 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
603 {
604 p = fmtmode(st->st_mode, 0);
605 ep->type = toupper(ep->type);
606 }
607 else
608 {
609 num = modex(st->st_mode);
610 ep->type = 'N';
611 }
612 break;
613 case INFO_mtime:
614 num = st->st_ctime;
615 ep->type = 'D';
616 break;
617 case INFO_name:
618 if (!base)
619 {
620 if (base = strrchr(file, '/'))
621 base++;
622 else
623 base = (char*)file;
624 }
625 p = base;
626 ep->type = toupper(ep->type);
627 break;
628 case INFO_nlink:
629 num = st->st_nlink;
630 ep->type = 'N';
631 break;
632 case INFO_size:
633 num = st->st_size;
634 ep->type = 'N';
635 break;
636 case INFO_uid:
637 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
638 {
639 p = fmtuid(st->st_uid);
640 ep->type = toupper(ep->type);
641 }
642 else
643 {
644 num = st->st_uid;
645 ep->type = 'N';
646 }
647 break;
648 }
649 switch (ep->type)
650 {
651
652 case 'b':
653 if (!(p = getdata(mp, num, 1)))
654 goto next;
655 num = *(unsigned char*)p;
656 break;
657
658 case 'h':
659 if (!(p = getdata(mp, num, 2)))
660 goto next;
661 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
662 break;
663
664 case 'd':
665 case 'l':
666 case 'v':
667 if (!(p = getdata(mp, num, 4)))
668 goto next;
669 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
670 break;
671
672 case 'q':
673 if (!(p = getdata(mp, num, 8)))
674 goto next;
675 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
676 break;
677
678 case 'e':
679 if (!(p = getdata(mp, num, 0)))
680 goto next;
681 /*FALLTHROUGH*/
682 case 'E':
683 if (!ep->value.sub)
684 goto next;
685 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
686 {
687 c = mp->fbsz;
688 if (c >= sizeof(mp->nbuf))
689 c = sizeof(mp->nbuf) - 1;
690 p = (char*)memcpy(mp->nbuf, p, c);
691 p[c] = 0;
692 ccmapstr(mp->x2n, p, c);
693 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
694 {
695 if (c != REG_NOMATCH)
696 regmessage(mp, ep->value.sub, c);
697 goto next;
698 }
699 }
700 p = ep->value.sub->re_sub->re_buf;
701 q = T(ep->desc);
702 t = *q ? q : p;
703 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
704 *b++ = ' ';
705 b += sfsprintf(b, end - b, *q ? q : "%s", p + (*p == '\b'));
706 if (ep->mime)
707 mp->mime = ep->mime;
708 goto checknest;
709
710 case 's':
711 if (!(p = getdata(mp, num, ep->mask)))
712 goto next;
713 goto checkstr;
714 case 'm':
715 if (!(p = getdata(mp, num, 0)))
716 goto next;
717 /*FALLTHROUGH*/
718 case 'M':
719 case 'S':
720 checkstr:
721 for (;;)
722 {
723 if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
724 break;
725 if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
726 break;
727 if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
728 goto next;
729 p = (char*)memcpy(mp->nbuf, p, ep->mask);
730 p[ep->mask] = 0;
731 ccmapstr(mp->x2n, p, ep->mask);
732 }
733 q = T(ep->desc);
734 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
735 *b++ = ' ';
736 for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
737 *t = 0;
738 b += sfsprintf(b, end - b, q + (*q == '\b'), p);
739 *t = c;
740 if (ep->mime)
741 mp->mime = ep->mime;
742 goto checknest;
743
744 }
745 if (mask = ep->mask)
746 num &= mask;
747 switch (ep->op)
748 {
749
750 case '=':
751 case '@':
752 if (num == ep->value.num)
753 break;
754 if (ep->cont != '#')
755 goto next;
756 if (!mask)
757 mask = ~mask;
758 if (ep->type == 'h')
759 {
760 if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
761 {
762 if (!(mp->swap & (mp->swap + 1)))
763 mp->swap = 7;
764 goto swapped;
765 }
766 }
767 else if (ep->type == 'l')
768 {
769 for (c = 1; c < 4; c++)
770 if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
771 {
772 if (!(mp->swap & (mp->swap + 1)))
773 mp->swap = 7;
774 goto swapped;
775 }
776 }
777 else if (ep->type == 'q')
778 {
779 for (c = 1; c < 8; c++)
780 if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
781 goto swapped;
782 }
783 goto next;
784
785 case '!':
786 if (num != ep->value.num)
787 break;
788 goto next;
789
790 case '^':
791 if (num ^ ep->value.num)
792 break;
793 goto next;
794
795 case '>':
796 if (num > ep->value.num)
797 break;
798 goto next;
799
800 case '<':
801 if (num < ep->value.num)
802 break;
803 goto next;
804
805 case 'l':
806 if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
807 {
808 if (!ep->value.loop->count)
809 {
810 ep->value.loop->count = num;
811 ep->value.loop->offset = off;
812 off = ep->value.loop->start;
813 }
814 else if (!--ep->value.loop->count)
815 {
816 off = ep->value.loop->offset;
817 goto next;
818 }
819 else
820 off += ep->value.loop->size;
821 mp->ret[++call] = ep;
822 ep = ep->value.loop->lab;
823 goto fun;
824 }
825 goto next;
826
827 case 'm':
828 c = mp->swap;
829 t = ckmagic(mp, file, b + (b > cur), end, st, num);
830 mp->swap = c;
831 if (t)
832 {
833 if (b > cur && b < end)
834 *b = ' ';
835 b += strlen(b);
836 }
837 else if (ep->cont == '&')
838 goto next;
839 break;
840
841 case 'r':
842 #if _UWIN
843 {
844 char* e;
845 Sfio_t* rp;
846 Sfio_t* gp;
847
848 if (!(t = strrchr(file, '.')))
849 goto next;
850 sfprintf(mp->tmp, "/reg/classes_root/%s", t);
851 if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
852 goto next;
853 *ep->desc = 0;
854 *ep->mime = 0;
855 gp = 0;
856 while (t = sfgetr(rp, '\n', 1))
857 {
858 if (strneq(t, "Content Type=", 13))
859 {
860 ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
861 strcpy(ep->mime, t + 13);
862 if (gp)
863 break;
864 }
865 else
866 {
867 sfprintf(mp->tmp, "/reg/classes_root/%s", t);
868 if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
869 {
870 ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
871 strcpy(ep->desc, t);
872 if (*ep->mime)
873 break;
874 }
875 }
876 }
877 sfclose(rp);
878 if (!gp)
879 goto next;
880 if (!*ep->mime)
881 {
882 t = T(ep->desc);
883 if (!strncasecmp(t, "microsoft", 9))
884 t += 9;
885 while (isspace(*t))
886 t++;
887 e = "application/x-ms-";
888 ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
889 e = strcopy(ep->mime, e);
890 while ((c = *t++) && c != '.' && c != ' ')
891 *e++ = isupper(c) ? tolower(c) : c;
892 *e = 0;
893 }
894 while (t = sfgetr(gp, '\n', 1))
895 if (*t && !streq(t, "\"\""))
896 {
897 ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
898 strcpy(ep->desc, t);
899 break;
900 }
901 sfclose(gp);
902 if (!*ep->desc)
903 goto next;
904 if (!t)
905 for (t = T(ep->desc); *t; t++)
906 if (*t == '.')
907 *t = ' ';
908 if (!mp->keep[level])
909 mp->keep[level] = 2;
910 mp->mime = ep->mime;
911 break;
912 }
913 #else
914 if (ep->cont == '#' && !mp->keep[level])
915 mp->keep[level] = 1;
916 goto next;
917 #endif
918
919 case 'v':
920 if (!(p = getdata(mp, num, 4)))
921 goto next;
922 c = 0;
923 do
924 {
925 num++;
926 c = (c<<7) | (*p & 0x7f);
927 } while (*p++ & 0x80);
928 if (!(p = getdata(mp, num, c)))
929 goto next;
930 if (mp->keep[level]++ && b > cur && b < (end - 1) && *(b - 1) != ' ')
931 {
932 *b++ = ',';
933 *b++ = ' ';
934 }
935 b = vcdecomp(b, cur + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
936 goto checknest;
937
938 }
939 swapped:
940 q = T(ep->desc);
941 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
942 *b++ = ' ';
943 if (*q == '\b')
944 q++;
945 str = 0;
946 for (t = q; *t; t++)
947 if (*t == '%' && (c = *(t + 1)))
948 {
949 if (c == '%')
950 t++;
951 else
952 while (c && c != '%')
953 {
954 if (c == 's')
955 {
956 str = 1;
957 break;
958 }
959 else if (c == 'c' || c == 'd' || c == 'i' || c == 'u' || c == 'x' || c == 'X')
960 goto format;
961 t++;
962 c = *(t + 1);
963 }
964 }
965 format:
966 if (!str)
967 b += sfsprintf(b, end - b, q, num, num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
968 else if (ep->type == 'd' || ep->type == 'D')
969 b += sfsprintf(b, end - b, q, fmttime("%?%QL", (time_t)num), 0, 0, 0, 0, 0, 0, 0);
970 else if (ep->type == 'v')
971 b += sfsprintf(b, end - b, q, fmtversion(num), 0, 0, 0, 0, 0, 0, 0);
972 else
973 b += sfsprintf(b, end - b, q, fmtnum(num, 0), num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
974 if (ep->mime && *ep->mime)
975 mp->mime = ep->mime;
976 checknest:
977 if (ep->nest == '}')
978 {
979 if (!mp->keep[level])
980 {
981 b = mp->msg[level];
982 mp->mime = mp->cap[level];
983 }
984 else if (level > 0)
985 mp->keep[level - 1] = mp->keep[level];
986 if (--level < 0)
987 {
988 level = 0;
989 mp->keep[0] = 0;
990 }
991 }
992 continue;
993 next:
994 if (ep->cont == '&')
995 mp->keep[level] = 0;
996 goto checknest;
997 }
998 if (all && b-- || mp->keep[level] && b > cur)
999 {
1000 *b = 0;
1001 return buf;
1002 }
1003 return 0;
1004 }
1005
1006 /*
1007 * check english language stats
1008 */
1009
1010 static int
ckenglish(register Magic_t * mp,int pun,int badpun)1011 ckenglish(register Magic_t* mp, int pun, int badpun)
1012 {
1013 register char* s;
1014 register int vowl = 0;
1015 register int freq = 0;
1016 register int rare = 0;
1017
1018 if (5 * badpun > pun)
1019 return 0;
1020 if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
1021 return 0;
1022 if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
1023 return 0;
1024 for (s = "aeiou"; *s; s++)
1025 vowl += mp->count[toupper(*s)] + mp->count[*s];
1026 for (s = "etaion"; *s; s++)
1027 freq += mp->count[toupper(*s)] + mp->count[*s];
1028 for (s = "vjkqxz"; *s; s++)
1029 rare += mp->count[toupper(*s)] + mp->count[*s];
1030 return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
1031 }
1032
1033 /*
1034 * check programming language stats
1035 */
1036
1037 static char*
cklang(register Magic_t * mp,const char * file,char * buf,char * end,struct stat * st)1038 cklang(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st)
1039 {
1040 register int c;
1041 register unsigned char* b;
1042 register unsigned char* e;
1043 register int q;
1044 register char* s;
1045 char* t;
1046 char* base;
1047 char* suff;
1048 char* t1;
1049 char* t2;
1050 char* t3;
1051 int n;
1052 int badpun;
1053 int code;
1054 int pun;
1055 Cctype_t flags;
1056 Info_t* ip;
1057
1058 b = (unsigned char*)mp->fbuf;
1059 e = b + mp->fbsz;
1060 memzero(mp->count, sizeof(mp->count));
1061 memzero(mp->multi, sizeof(mp->multi));
1062 memzero(mp->identifier, sizeof(mp->identifier));
1063
1064 /*
1065 * check character coding
1066 */
1067
1068 flags = 0;
1069 while (b < e)
1070 flags |= mp->cctype[*b++];
1071 b = (unsigned char*)mp->fbuf;
1072 code = 0;
1073 q = CC_ASCII;
1074 n = CC_MASK;
1075 for (c = 0; c < CC_MAPS; c++)
1076 {
1077 flags ^= CC_text;
1078 if ((flags & CC_MASK) < n)
1079 {
1080 n = flags & CC_MASK;
1081 q = c;
1082 }
1083 flags >>= CC_BIT;
1084 }
1085 flags = n;
1086 if (!(flags & (CC_binary|CC_notext)))
1087 {
1088 if (q != CC_NATIVE)
1089 {
1090 code = q;
1091 ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
1092 }
1093 if (b[0] == '#' && b[1] == '!')
1094 {
1095 for (b += 2; b < e && isspace(*b); b++);
1096 for (s = (char*)b; b < e && isprint(*b); b++);
1097 c = *b;
1098 *b = 0;
1099 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
1100 {
1101 if (t = strrchr(s, '/'))
1102 s = t + 1;
1103 for (t = s; *t; t++)
1104 if (isspace(*t))
1105 {
1106 *t = 0;
1107 break;
1108 }
1109 sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
1110 mp->mime = mp->mbuf;
1111 if (match(s, "*sh"))
1112 {
1113 t1 = T("command");
1114 if (streq(s, "sh"))
1115 *s = 0;
1116 else
1117 {
1118 *b++ = ' ';
1119 *b = 0;
1120 }
1121 }
1122 else
1123 {
1124 t1 = T("interpreter");
1125 *b++ = ' ';
1126 *b = 0;
1127 }
1128 sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
1129 s = mp->sbuf;
1130 goto qualify;
1131 }
1132 *b = c;
1133 b = (unsigned char*)mp->fbuf;
1134 }
1135 badpun = 0;
1136 pun = 0;
1137 q = 0;
1138 s = 0;
1139 t = 0;
1140 while (b < e)
1141 {
1142 c = *b++;
1143 mp->count[c]++;
1144 if (c == q && (q != '*' || *b == '/' && b++))
1145 {
1146 mp->multi[q]++;
1147 q = 0;
1148 }
1149 else if (c == '\\')
1150 {
1151 s = 0;
1152 b++;
1153 }
1154 else if (!q)
1155 {
1156 if (isalpha(c) || c == '_')
1157 {
1158 if (!s)
1159 s = (char*)b - 1;
1160 }
1161 else if (!isdigit(c))
1162 {
1163 if (s)
1164 {
1165 if (s > mp->fbuf)
1166 switch (*(s - 1))
1167 {
1168 case ':':
1169 if (*b == ':')
1170 mp->multi[':']++;
1171 break;
1172 case '.':
1173 if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
1174 mp->multi['.']++;
1175 break;
1176 case '\n':
1177 case '\\':
1178 if (*b == '{')
1179 t = (char*)b + 1;
1180 break;
1181 case '{':
1182 if (s == t && *b == '}')
1183 mp->multi['X']++;
1184 break;
1185 }
1186 if (!mp->idtab)
1187 {
1188 if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dtset))
1189 for (q = 0; q < elementsof(dict); q++)
1190 dtinsert(mp->idtab, &dict[q]);
1191 else if (mp->disc->errorf)
1192 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
1193 q = 0;
1194 }
1195 if (mp->idtab)
1196 {
1197 *(b - 1) = 0;
1198 if (ip = (Info_t*)dtmatch(mp->idtab, s))
1199 mp->identifier[ip->value]++;
1200 *(b - 1) = c;
1201 }
1202 s = 0;
1203 }
1204 switch (c)
1205 {
1206 case '\t':
1207 if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
1208 mp->multi['\t']++;
1209 break;
1210 case '"':
1211 case '\'':
1212 q = c;
1213 break;
1214 case '/':
1215 if (*b == '*')
1216 q = *b++;
1217 else if (*b == '/')
1218 q = '\n';
1219 break;
1220 case '$':
1221 if (*b == '(' && *(b + 1) != ' ')
1222 mp->multi['$']++;
1223 break;
1224 case '{':
1225 case '}':
1226 case '[':
1227 case ']':
1228 case '(':
1229 mp->multi[c]++;
1230 break;
1231 case ')':
1232 mp->multi[c]++;
1233 goto punctuation;
1234 case ':':
1235 if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
1236 mp->multi[':']++;
1237 goto punctuation;
1238 case '.':
1239 case ',':
1240 case '%':
1241 case ';':
1242 case '?':
1243 punctuation:
1244 pun++;
1245 if (*b != ' ' && *b != '\n')
1246 badpun++;
1247 break;
1248 }
1249 }
1250 }
1251 }
1252 }
1253 else
1254 while (b < e)
1255 mp->count[*b++]++;
1256 base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
1257 suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
1258 if (!flags)
1259 {
1260 if (match(suff, "*sh|bat|cmd"))
1261 goto id_sh;
1262 if (match(base, "*@(mkfile)"))
1263 goto id_mk;
1264 if (match(base, "*@(makefile|.mk)"))
1265 goto id_make;
1266 if (match(base, "*@(mamfile|.mam)"))
1267 goto id_mam;
1268 if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
1269 goto id_c;
1270 if (match(suff, "f"))
1271 goto id_fortran;
1272 if (match(suff, "htm+(l)"))
1273 goto id_html;
1274 if (match(suff, "cpy"))
1275 goto id_copybook;
1276 if (match(suff, "cob|cbl|cb2"))
1277 goto id_cobol;
1278 if (match(suff, "pl[1i]"))
1279 goto id_pl1;
1280 if (match(suff, "tex"))
1281 goto id_tex;
1282 if (match(suff, "asm|s"))
1283 goto id_asm;
1284 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
1285 {
1286 id_sh:
1287 s = T("command script");
1288 mp->mime = "application/sh";
1289 goto qualify;
1290 }
1291 if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
1292 {
1293 s = T("mail message");
1294 mp->mime = "message/rfc822";
1295 goto qualify;
1296 }
1297 if (match(base, "*@(mkfile)"))
1298 {
1299 id_mk:
1300 s = "mkfile";
1301 mp->mime = "application/mk";
1302 goto qualify;
1303 }
1304 if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
1305 {
1306 id_make:
1307 s = "makefile";
1308 mp->mime = "application/make";
1309 goto qualify;
1310 }
1311 if (mp->multi['.'] >= 3)
1312 {
1313 s = T("nroff input");
1314 mp->mime = "application/x-troff";
1315 goto qualify;
1316 }
1317 if (mp->multi['X'] >= 3)
1318 {
1319 s = T("TeX input");
1320 mp->mime = "application/x-tex";
1321 goto qualify;
1322 }
1323 if (mp->fbsz < SF_BUFSIZE &&
1324 (mp->multi['('] == mp->multi[')'] &&
1325 mp->multi['{'] == mp->multi['}'] &&
1326 mp->multi['['] == mp->multi[']']) ||
1327 mp->fbsz >= SF_BUFSIZE &&
1328 (mp->multi['('] >= mp->multi[')'] &&
1329 mp->multi['{'] >= mp->multi['}'] &&
1330 mp->multi['['] >= mp->multi[']']))
1331 {
1332 c = mp->identifier[ID_INCL1];
1333 if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
1334 mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
1335 mp->count['='] >= 20 && mp->count[';'] >= 20)
1336 {
1337 id_c:
1338 t1 = "";
1339 t2 = "c ";
1340 t3 = T("program");
1341 switch (*suff)
1342 {
1343 case 'c':
1344 case 'C':
1345 mp->mime = "application/x-cc";
1346 break;
1347 case 'l':
1348 case 'L':
1349 t1 = "lex ";
1350 mp->mime = "application/x-lex";
1351 break;
1352 default:
1353 t3 = T("header");
1354 if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
1355 {
1356 mp->mime = "application/x-cc";
1357 break;
1358 }
1359 /*FALLTHROUGH*/
1360 case 'y':
1361 case 'Y':
1362 t1 = "yacc ";
1363 mp->mime = "application/x-yacc";
1364 break;
1365 }
1366 if (mp->identifier[ID_CPLUSPLUS] >= 3)
1367 {
1368 t2 = "c++ ";
1369 mp->mime = "application/x-c++";
1370 }
1371 sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
1372 s = mp->sbuf;
1373 goto qualify;
1374 }
1375 }
1376 if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
1377 (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
1378 mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
1379 {
1380 id_mam:
1381 s = T("mam program");
1382 mp->mime = "application/x-mam";
1383 goto qualify;
1384 }
1385 if (mp->identifier[ID_FORTRAN] >= 8)
1386 {
1387 id_fortran:
1388 s = T("fortran program");
1389 mp->mime = "application/x-fortran";
1390 goto qualify;
1391 }
1392 if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
1393 {
1394 id_html:
1395 s = T("html input");
1396 mp->mime = "text/html";
1397 goto qualify;
1398 }
1399 if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1400 {
1401 id_copybook:
1402 s = T("cobol copybook");
1403 mp->mime = "application/x-cobol";
1404 goto qualify;
1405 }
1406 if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1407 {
1408 id_cobol:
1409 s = T("cobol program");
1410 mp->mime = "application/x-cobol";
1411 goto qualify;
1412 }
1413 if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1414 {
1415 id_pl1:
1416 s = T("pl1 program");
1417 mp->mime = "application/x-pl1";
1418 goto qualify;
1419 }
1420 if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
1421 {
1422 id_tex:
1423 s = T("TeX input");
1424 mp->mime = "text/tex";
1425 goto qualify;
1426 }
1427 if (mp->identifier[ID_ASM] >= 4)
1428 {
1429 id_asm:
1430 s = T("as program");
1431 mp->mime = "application/x-as";
1432 goto qualify;
1433 }
1434 if (ckenglish(mp, pun, badpun))
1435 {
1436 s = T("english text");
1437 mp->mime = "text/plain";
1438 goto qualify;
1439 }
1440 }
1441 else if (streq(base, "core"))
1442 {
1443 mp->mime = "x-system/core";
1444 return T("core dump");
1445 }
1446 if (flags & (CC_binary|CC_notext))
1447 {
1448 b = (unsigned char*)mp->fbuf;
1449 e = b + mp->fbsz;
1450 n = 0;
1451 for (;;)
1452 {
1453 c = *b++;
1454 q = 0;
1455 while (c & 0x80)
1456 {
1457 c <<= 1;
1458 q++;
1459 }
1460 switch (q)
1461 {
1462 case 4:
1463 if (b < e && (*b++ & 0xc0) != 0x80)
1464 break;
1465 /* FALLTHROUGH */
1466 case 3:
1467 if (b < e && (*b++ & 0xc0) != 0x80)
1468 break;
1469 /* FALLTHROUGH */
1470 case 2:
1471 if (b < e && (*b++ & 0xc0) != 0x80)
1472 break;
1473 n = 1;
1474 /* FALLTHROUGH */
1475 case 0:
1476 if (b >= e)
1477 {
1478 if (n)
1479 {
1480 flags &= ~(CC_binary|CC_notext);
1481 flags |= CC_utf_8;
1482 }
1483 break;
1484 }
1485 continue;
1486 }
1487 break;
1488 }
1489 }
1490 if (flags & (CC_binary|CC_notext))
1491 {
1492 unsigned long d = 0;
1493
1494 if ((q = mp->fbsz / UCHAR_MAX) >= 2)
1495 {
1496 /*
1497 * compression/encryption via standard deviation
1498 */
1499
1500
1501 for (c = 0; c < UCHAR_MAX; c++)
1502 {
1503 pun = mp->count[c] - q;
1504 d += pun * pun;
1505 }
1506 d /= mp->fbsz;
1507 }
1508 if (d <= 0)
1509 s = T("binary");
1510 else if (d < 4)
1511 s = T("encrypted");
1512 else if (d < 16)
1513 s = T("packed");
1514 else if (d < 64)
1515 s = T("compressed");
1516 else if (d < 256)
1517 s = T("delta");
1518 else
1519 s = T("data");
1520 mp->mime = "application/octet-stream";
1521 return s;
1522 }
1523 mp->mime = "text/plain";
1524 if (flags & CC_utf_8)
1525 s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
1526 else if (flags & CC_latin)
1527 s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
1528 else
1529 s = (flags & CC_control) ? T("text with control characters") : T("text");
1530 qualify:
1531 if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
1532 {
1533 t = "dos ";
1534 mp->mime = "text/dos";
1535 }
1536 else
1537 t = "";
1538 if (code)
1539 {
1540 if (code == CC_ASCII)
1541 sfsprintf(buf, end - buf, "ascii %s%s", t, s);
1542 else
1543 {
1544 sfsprintf(buf, end - buf, "ebcdic%d %s%s", code - 1, t, s);
1545 mp->mime = "text/ebcdic";
1546 }
1547 s = buf;
1548 }
1549 else if (*t)
1550 {
1551 sfsprintf(buf, end - buf, "%s%s", t, s);
1552 s = buf;
1553 }
1554 return s;
1555 }
1556
1557 /*
1558 * return the basic magic string for file,st in buf,size
1559 */
1560
1561 static char*
type(register Magic_t * mp,const char * file,struct stat * st,char * buf,char * end)1562 type(register Magic_t* mp, const char* file, struct stat* st, char* buf, char* end)
1563 {
1564 register char* s;
1565 register char* t;
1566
1567 mp->mime = 0;
1568 if (!S_ISREG(st->st_mode))
1569 {
1570 if (S_ISDIR(st->st_mode))
1571 {
1572 mp->mime = "x-system/dir";
1573 return T("directory");
1574 }
1575 if (S_ISLNK(st->st_mode))
1576 {
1577 mp->mime = "x-system/lnk";
1578 s = buf;
1579 s += sfsprintf(s, end - s, T("symbolic link to "));
1580 if (pathgetlink(file, s, end - s) < 0)
1581 return T("cannot read symbolic link text");
1582 return buf;
1583 }
1584 if (S_ISBLK(st->st_mode))
1585 {
1586 mp->mime = "x-system/blk";
1587 sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
1588 return buf;
1589 }
1590 if (S_ISCHR(st->st_mode))
1591 {
1592 mp->mime = "x-system/chr";
1593 sfsprintf(buf, end - buf, T("character special (%s)"), fmtdev(st));
1594 return buf;
1595 }
1596 if (S_ISFIFO(st->st_mode))
1597 {
1598 mp->mime = "x-system/fifo";
1599 return "fifo";
1600 }
1601 #ifdef S_ISSOCK
1602 if (S_ISSOCK(st->st_mode))
1603 {
1604 mp->mime = "x-system/sock";
1605 return "socket";
1606 }
1607 #endif
1608 }
1609 if (!(mp->fbmx = st->st_size))
1610 s = T("empty");
1611 else if (!mp->fp)
1612 s = T("cannot read");
1613 else
1614 {
1615 mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
1616 if (mp->fbsz < 0)
1617 s = fmterror(errno);
1618 else if (mp->fbsz == 0)
1619 s = T("empty");
1620 else
1621 {
1622 mp->fbuf[mp->fbsz] = 0;
1623 mp->xoff = 0;
1624 mp->xbsz = 0;
1625 if (!(s = ckmagic(mp, file, buf, end, st, 0)))
1626 s = cklang(mp, file, buf, end, st);
1627 }
1628 }
1629 if (!mp->mime)
1630 mp->mime = "application/unknown";
1631 else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
1632 {
1633 register char* b;
1634 register char* be;
1635 register char* m;
1636 register char* me;
1637
1638 b = mp->mime;
1639 me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
1640 while (m < me && b < t)
1641 *m++ = *b++;
1642 b = t = s;
1643 for (;;)
1644 {
1645 if (!(be = strchr(t, ' ')))
1646 {
1647 be = b + strlen(b);
1648 break;
1649 }
1650 if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
1651 break;
1652 b = t;
1653 t = be + 1;
1654 }
1655 while (m < me && b < be)
1656 if ((*m++ = *b++) == ' ')
1657 *(m - 1) = '-';
1658 *m = 0;
1659 }
1660 return s;
1661 }
1662
1663 /*
1664 * low level for magicload()
1665 */
1666
1667 static int
load(register Magic_t * mp,char * file,register Sfio_t * fp)1668 load(register Magic_t* mp, char* file, register Sfio_t* fp)
1669 {
1670 register Entry_t* ep;
1671 register char* p;
1672 register char* p2;
1673 char* p3;
1674 char* next;
1675 int n;
1676 int lge;
1677 int lev;
1678 int ent;
1679 int old;
1680 int cont;
1681 Info_t* ip;
1682 Entry_t* ret;
1683 Entry_t* first;
1684 Entry_t* last = 0;
1685 Entry_t* fun['z' - 'a' + 1];
1686
1687 memzero(fun, sizeof(fun));
1688 cont = '$';
1689 ent = 0;
1690 lev = 0;
1691 old = 0;
1692 ret = 0;
1693 error_info.file = file;
1694 error_info.line = 0;
1695 first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1696 while (p = sfgetr(fp, '\n', 1))
1697 {
1698 error_info.line++;
1699 for (; isspace(*p); p++);
1700
1701 /*
1702 * nesting
1703 */
1704
1705 switch (*p)
1706 {
1707 case 0:
1708 case '#':
1709 cont = '#';
1710 continue;
1711 case '{':
1712 if (++lev < MAXNEST)
1713 ep->nest = *p;
1714 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1715 (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
1716 continue;
1717 case '}':
1718 if (!last || lev <= 0)
1719 {
1720 if (mp->disc->errorf)
1721 (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
1722 }
1723 else if (lev-- == ent)
1724 {
1725 ent = 0;
1726 ep->cont = ':';
1727 ep->offset = ret->offset;
1728 ep->nest = ' ';
1729 ep->type = ' ';
1730 ep->op = ' ';
1731 ep->desc = "[RETURN]";
1732 last = ep;
1733 ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1734 ret = 0;
1735 }
1736 else
1737 last->nest = *p;
1738 continue;
1739 default:
1740 if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
1741 {
1742 n = *p++;
1743 if (n >= 'a' && n <= 'z')
1744 n -= 'a';
1745 else
1746 {
1747 if (mp->disc->errorf)
1748 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
1749 n = 0;
1750 }
1751 if (ret && mp->disc->errorf)
1752 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
1753 if (*p == '{')
1754 {
1755 ent = ++lev;
1756 ret = ep;
1757 ep->desc = "[FUNCTION]";
1758 }
1759 else
1760 {
1761 if (*(p + 1) != ')' && mp->disc->errorf)
1762 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
1763 ep->desc = "[CALL]";
1764 }
1765 ep->cont = cont;
1766 ep->offset = n;
1767 ep->nest = ' ';
1768 ep->type = ' ';
1769 ep->op = ' ';
1770 last = ep;
1771 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1772 if (ret)
1773 fun[n] = last->value.lab = ep;
1774 else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
1775 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
1776 continue;
1777 }
1778 if (!ep->nest)
1779 ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
1780 break;
1781 }
1782
1783 /*
1784 * continuation
1785 */
1786
1787 cont = '$';
1788 switch (*p)
1789 {
1790 case '>':
1791 old = 1;
1792 if (*(p + 1) == *p)
1793 {
1794 /*
1795 * old style nesting push
1796 */
1797
1798 p++;
1799 old = 2;
1800 if (!lev && last)
1801 {
1802 lev = 1;
1803 last->nest = '{';
1804 if (last->cont == '>')
1805 last->cont = '&';
1806 ep->nest = '1';
1807 }
1808 }
1809 /*FALLTHROUGH*/
1810 case '+':
1811 case '&':
1812 case '|':
1813 ep->cont = *p++;
1814 break;
1815 default:
1816 if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
1817 (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
1818 /*FALLTHROUGH*/
1819 case '*':
1820 case '0': case '1': case '2': case '3': case '4':
1821 case '5': case '6': case '7': case '8': case '9':
1822 ep->cont = (lev > 0) ? '&' : '#';
1823 break;
1824 }
1825 switch (old)
1826 {
1827 case 1:
1828 old = 0;
1829 if (lev)
1830 {
1831 /*
1832 * old style nesting pop
1833 */
1834
1835 lev = 0;
1836 if (last)
1837 last->nest = '}';
1838 ep->nest = ' ';
1839 if (ep->cont == '&')
1840 ep->cont = '#';
1841 }
1842 break;
1843 case 2:
1844 old = 1;
1845 break;
1846 }
1847 if (isdigit(*p))
1848 {
1849 /*
1850 * absolute offset
1851 */
1852
1853 ep->offset = strton(p, &next, NiL, 0);
1854 p2 = next;
1855 }
1856 else
1857 {
1858 for (p2 = p; *p2 && !isspace(*p2); p2++);
1859 if (!*p2)
1860 {
1861 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1862 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1863 continue;
1864 }
1865
1866 /*
1867 * offset expression
1868 */
1869
1870 *p2++ = 0;
1871 ep->expr = vmstrdup(mp->vm, p);
1872 if (isalpha(*p))
1873 ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
1874 else if (*p == '(' && ep->cont == '>')
1875 {
1876 /*
1877 * convert old style indirection to @
1878 */
1879
1880 p = ep->expr + 1;
1881 for (;;)
1882 {
1883 switch (*p++)
1884 {
1885 case 0:
1886 case '@':
1887 case '(':
1888 break;
1889 case ')':
1890 break;
1891 default:
1892 continue;
1893 }
1894 break;
1895 }
1896 if (*--p == ')')
1897 {
1898 *p = 0;
1899 *ep->expr = '@';
1900 }
1901 }
1902 }
1903 for (; isspace(*p2); p2++);
1904 for (p = p2; *p2 && !isspace(*p2); p2++);
1905 if (!*p2)
1906 {
1907 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1908 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1909 continue;
1910 }
1911 *p2++ = 0;
1912
1913 /*
1914 * type
1915 */
1916
1917 if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
1918 {
1919 ep->swap = ~(*p == 'l' ? 7 : 0);
1920 p += 2;
1921 }
1922 if (*p == 's')
1923 {
1924 if (*(p + 1) == 'h')
1925 ep->type = 'h';
1926 else
1927 ep->type = 's';
1928 }
1929 else if (*p == 'a')
1930 ep->type = 's';
1931 else
1932 ep->type = *p;
1933 if (p = strchr(p, '&'))
1934 {
1935 /*
1936 * old style mask
1937 */
1938
1939 ep->mask = strton(++p, NiL, NiL, 0);
1940 }
1941 for (; isspace(*p2); p2++);
1942 if (ep->mask)
1943 *--p2 = '=';
1944
1945 /*
1946 * comparison operation
1947 */
1948
1949 p = p2;
1950 if (p2 = strchr(p, '\t'))
1951 *p2++ = 0;
1952 else
1953 {
1954 int qe = 0;
1955 int qn = 0;
1956
1957 /*
1958 * assume balanced {}[]()\\""'' field
1959 */
1960
1961 for (p2 = p;;)
1962 {
1963 switch (n = *p2++)
1964 {
1965 case 0:
1966 break;
1967 case '{':
1968 if (!qe)
1969 qe = '}';
1970 if (qe == '}')
1971 qn++;
1972 continue;
1973 case '(':
1974 if (!qe)
1975 qe = ')';
1976 if (qe == ')')
1977 qn++;
1978 continue;
1979 case '[':
1980 if (!qe)
1981 qe = ']';
1982 if (qe == ']')
1983 qn++;
1984 continue;
1985 case '}':
1986 case ')':
1987 case ']':
1988 if (qe == n && qn > 0)
1989 qn--;
1990 continue;
1991 case '"':
1992 case '\'':
1993 if (!qe)
1994 qe = n;
1995 else if (qe == n)
1996 qe = 0;
1997 continue;
1998 case '\\':
1999 if (*p2)
2000 p2++;
2001 continue;
2002 default:
2003 if (!qe && isspace(n))
2004 break;
2005 continue;
2006 }
2007 if (n)
2008 *(p2 - 1) = 0;
2009 else
2010 p2--;
2011 break;
2012 }
2013 }
2014 lge = 0;
2015 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
2016 ep->op = '=';
2017 else
2018 {
2019 if (*p == '&')
2020 {
2021 ep->mask = strton(++p, &next, NiL, 0);
2022 p = next;
2023 }
2024 switch (*p)
2025 {
2026 case '=':
2027 case '>':
2028 case '<':
2029 case '*':
2030 ep->op = *p++;
2031 if (*p == '=')
2032 {
2033 p++;
2034 switch (ep->op)
2035 {
2036 case '>':
2037 lge = -1;
2038 break;
2039 case '<':
2040 lge = 1;
2041 break;
2042 }
2043 }
2044 break;
2045 case '!':
2046 case '@':
2047 ep->op = *p++;
2048 if (*p == '=')
2049 p++;
2050 break;
2051 case 'x':
2052 p++;
2053 ep->op = '*';
2054 break;
2055 default:
2056 ep->op = '=';
2057 if (ep->mask)
2058 ep->value.num = ep->mask;
2059 break;
2060 }
2061 }
2062 if (ep->op != '*' && !ep->value.num)
2063 {
2064 if (ep->type == 'e')
2065 {
2066 if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
2067 {
2068 ep->value.sub->re_disc = &mp->redisc;
2069 if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
2070 {
2071 p += ep->value.sub->re_npat;
2072 if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
2073 p += ep->value.sub->re_npat;
2074 }
2075 if (n)
2076 {
2077 regmessage(mp, ep->value.sub, n);
2078 ep->value.sub = 0;
2079 }
2080 else if (*p && mp->disc->errorf)
2081 (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
2082 }
2083 }
2084 else if (ep->type == 'm')
2085 {
2086 ep->mask = stresc(p) + 1;
2087 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
2088 memcpy(ep->value.str, p, ep->mask);
2089 if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
2090 ep->value.str[ep->mask - 1] = '*';
2091 }
2092 else if (ep->type == 's')
2093 {
2094 ep->mask = stresc(p);
2095 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
2096 memcpy(ep->value.str, p, ep->mask);
2097 }
2098 else if (*p == '\'')
2099 {
2100 stresc(p);
2101 ep->value.num = *(unsigned char*)(p + 1) + lge;
2102 }
2103 else if (strmatch(p, "+([a-z])\\(*\\)"))
2104 {
2105 char* t;
2106
2107 t = p;
2108 ep->type = 'V';
2109 ep->op = *p;
2110 while (*p && *p++ != '(');
2111 switch (ep->op)
2112 {
2113 case 'l':
2114 n = *p++;
2115 if (n < 'a' || n > 'z')
2116 {
2117 if (mp->disc->errorf)
2118 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
2119 }
2120 else if (!fun[n -= 'a'])
2121 {
2122 if (mp->disc->errorf)
2123 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
2124 }
2125 else
2126 {
2127 ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
2128 ep->value.loop->lab = fun[n];
2129 while (*p && *p++ != ',');
2130 ep->value.loop->start = strton(p, &t, NiL, 0);
2131 while (*t && *t++ != ',');
2132 ep->value.loop->size = strton(t, &t, NiL, 0);
2133 }
2134 break;
2135 case 'm':
2136 case 'r':
2137 ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
2138 ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
2139 break;
2140 case 'v':
2141 break;
2142 default:
2143 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2144 (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
2145 break;
2146 }
2147 }
2148 else
2149 {
2150 ep->value.num = strton(p, NiL, NiL, 0) + lge;
2151 if (ep->op == '@')
2152 ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
2153 }
2154 }
2155
2156 /*
2157 * file description
2158 */
2159
2160 if (p2)
2161 {
2162 for (; isspace(*p2); p2++);
2163 if (p = strchr(p2, '\t'))
2164 {
2165 /*
2166 * check for message catalog index
2167 */
2168
2169 *p++ = 0;
2170 if (isalpha(*p2))
2171 {
2172 for (p3 = p2; isalnum(*p3); p3++);
2173 if (*p3++ == ':')
2174 {
2175 for (; isdigit(*p3); p3++);
2176 if (!*p3)
2177 {
2178 for (p2 = p; isspace(*p2); p2++);
2179 if (p = strchr(p2, '\t'))
2180 *p++ = 0;
2181 }
2182 }
2183 }
2184 }
2185 stresc(p2);
2186 ep->desc = vmstrdup(mp->vm, p2);
2187 if (p)
2188 {
2189 for (; isspace(*p); p++);
2190 if (*p)
2191 ep->mime = vmstrdup(mp->vm, p);
2192 }
2193 }
2194 else
2195 ep->desc = "";
2196
2197 /*
2198 * get next entry
2199 */
2200
2201 last = ep;
2202 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
2203 }
2204 if (last)
2205 {
2206 last->next = 0;
2207 if (mp->magiclast)
2208 mp->magiclast->next = first;
2209 else
2210 mp->magic = first;
2211 mp->magiclast = last;
2212 }
2213 vmfree(mp->vm, ep);
2214 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2215 {
2216 if (lev < 0)
2217 (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
2218 else if (lev > 0)
2219 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
2220 if (ret)
2221 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
2222 }
2223 error_info.file = 0;
2224 error_info.line = 0;
2225 return 0;
2226 }
2227
2228 /*
2229 * load a magic file into mp
2230 */
2231
2232 int
magicload(register Magic_t * mp,const char * file,unsigned long flags)2233 magicload(register Magic_t* mp, const char* file, unsigned long flags)
2234 {
2235 register char* s;
2236 register char* e;
2237 register char* t;
2238 int n;
2239 int found;
2240 int list;
2241 Sfio_t* fp;
2242
2243 mp->flags = mp->disc->flags | flags;
2244 found = 0;
2245 if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
2246 {
2247 if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
2248 s = MAGIC_FILE;
2249 }
2250 for (;;)
2251 {
2252 if (!list)
2253 e = 0;
2254 else if (e = strchr(s, ':'))
2255 {
2256 /*
2257 * ok, so ~ won't work for the last list element
2258 * we do it for MAGIC_FILES_ENV anyway
2259 */
2260
2261 if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
2262 {
2263 sfputr(mp->tmp, t, -1);
2264 s += n - 1;
2265 }
2266 sfwrite(mp->tmp, s, e - s);
2267 if (!(s = sfstruse(mp->tmp)))
2268 goto nospace;
2269 }
2270 if (!*s || streq(s, "-"))
2271 s = MAGIC_FILE;
2272 if (!(fp = sfopen(NiL, s, "r")))
2273 {
2274 if (list)
2275 {
2276 if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))) && !strchr(s, '/'))
2277 {
2278 strcpy(mp->fbuf, s);
2279 sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
2280 if (!(s = sfstruse(mp->tmp)))
2281 goto nospace;
2282 if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))))
2283 goto next;
2284 }
2285 if (!(fp = sfopen(NiL, t, "r")))
2286 goto next;
2287 }
2288 else
2289 {
2290 if (mp->disc->errorf)
2291 (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
2292 return -1;
2293 }
2294 }
2295 found = 1;
2296 n = load(mp, s, fp);
2297 sfclose(fp);
2298 if (n && !list)
2299 return -1;
2300 next:
2301 if (!e)
2302 break;
2303 s = e + 1;
2304 }
2305 if (!found)
2306 {
2307 if (mp->flags & MAGIC_VERBOSE)
2308 {
2309 if (mp->disc->errorf)
2310 (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
2311 }
2312 return -1;
2313 }
2314 return 0;
2315 nospace:
2316 if (mp->disc->errorf)
2317 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
2318 return -1;
2319 }
2320
2321 /*
2322 * open a magic session
2323 */
2324
2325 Magic_t*
magicopen(Magicdisc_t * disc)2326 magicopen(Magicdisc_t* disc)
2327 {
2328 register Magic_t* mp;
2329 register int i;
2330 register int n;
2331 register int f;
2332 register int c;
2333 register Vmalloc_t* vm;
2334 unsigned char* map[CC_MAPS + 1];
2335
2336 if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
2337 return 0;
2338 if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
2339 {
2340 vmclose(vm);
2341 return 0;
2342 }
2343 mp->id = lib;
2344 mp->disc = disc;
2345 mp->vm = vm;
2346 mp->flags = disc->flags;
2347 mp->redisc.re_version = REG_VERSION;
2348 mp->redisc.re_flags = REG_NOFREE;
2349 mp->redisc.re_errorf = (regerror_t)disc->errorf;
2350 mp->redisc.re_resizef = (regresize_t)vmgetmem;
2351 mp->redisc.re_resizehandle = (void*)mp->vm;
2352 mp->dtdisc.key = offsetof(Info_t, name);
2353 mp->dtdisc.link = offsetof(Info_t, link);
2354 if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dtoset)))
2355 goto bad;
2356 for (n = 0; n < elementsof(info); n++)
2357 dtinsert(mp->infotab, &info[n]);
2358 for (i = 0; i < CC_MAPS; i++)
2359 map[i] = ccmap(i, CC_ASCII);
2360 mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
2361 for (n = 0; n <= UCHAR_MAX; n++)
2362 {
2363 f = 0;
2364 i = CC_MAPS;
2365 while (--i >= 0)
2366 {
2367 c = ccmapchr(map[i], n);
2368 f = (f << CC_BIT) | CCTYPE(c);
2369 }
2370 mp->cctype[n] = f;
2371 }
2372 return mp;
2373 bad:
2374 magicclose(mp);
2375 return 0;
2376 }
2377
2378 /*
2379 * close a magicopen() session
2380 */
2381
2382 int
magicclose(register Magic_t * mp)2383 magicclose(register Magic_t* mp)
2384 {
2385 if (!mp)
2386 return -1;
2387 if (mp->tmp)
2388 sfstrclose(mp->tmp);
2389 if (mp->vm)
2390 vmclose(mp->vm);
2391 return 0;
2392 }
2393
2394 /*
2395 * return the magic string for file with optional stat info st
2396 */
2397
2398 char*
magictype(register Magic_t * mp,Sfio_t * fp,const char * file,register struct stat * st)2399 magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
2400 {
2401 off_t off;
2402 char* s;
2403
2404 mp->flags = mp->disc->flags;
2405 mp->mime = 0;
2406 if (!st)
2407 s = T("cannot stat");
2408 else
2409 {
2410 if (mp->fp = fp)
2411 off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
2412 s = type(mp, file, st, mp->tbuf, &mp->tbuf[sizeof(mp->tbuf)-1]);
2413 if (mp->fp)
2414 sfseek(mp->fp, off, SEEK_SET);
2415 if (!(mp->flags & (MAGIC_MIME|MAGIC_ALL)))
2416 {
2417 if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
2418 sfprintf(mp->tmp, "%s ", T("short"));
2419 sfprintf(mp->tmp, "%s", s);
2420 if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
2421 sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
2422 if (st->st_mode & S_ISUID)
2423 sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
2424 if (st->st_mode & S_ISGID)
2425 sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
2426 if (st->st_mode & S_ISVTX)
2427 sfprintf(mp->tmp, ", sticky");
2428 if (!(s = sfstruse(mp->tmp)))
2429 s = T("out of space");
2430 }
2431 }
2432 if (mp->flags & MAGIC_MIME)
2433 s = mp->mime;
2434 if (!s)
2435 s = T("error");
2436 return s;
2437 }
2438
2439 /*
2440 * list the magic table in mp on sp
2441 */
2442
2443 int
magiclist(register Magic_t * mp,register Sfio_t * sp)2444 magiclist(register Magic_t* mp, register Sfio_t* sp)
2445 {
2446 register Entry_t* ep = mp->magic;
2447 register Entry_t* rp = 0;
2448
2449 mp->flags = mp->disc->flags;
2450 sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
2451 while (ep)
2452 {
2453 sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
2454 if (ep->expr)
2455 sfprintf(sp, "%s", ep->expr);
2456 else
2457 sfprintf(sp, "%ld", ep->offset);
2458 sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
2459 switch (ep->type)
2460 {
2461 case 'm':
2462 case 's':
2463 sfputr(sp, fmtesc(ep->value.str), -1);
2464 break;
2465 case 'V':
2466 switch (ep->op)
2467 {
2468 case 'l':
2469 sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
2470 break;
2471 case 'v':
2472 sfprintf(sp, "vcodex()");
2473 break;
2474 default:
2475 sfprintf(sp, "%p", ep->value.str);
2476 break;
2477 }
2478 break;
2479 default:
2480 sfprintf(sp, "%lo", ep->value.num);
2481 break;
2482 }
2483 sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
2484 if (ep->cont == '$' && !ep->value.lab->mask)
2485 {
2486 rp = ep;
2487 ep = ep->value.lab;
2488 }
2489 else
2490 {
2491 if (ep->cont == ':')
2492 {
2493 ep = rp;
2494 ep->value.lab->mask = 1;
2495 }
2496 ep = ep->next;
2497 }
2498 }
2499 return 0;
2500 }
2501