file.h revision ae66b4b7f2cfa501c0f1c60e016d70d68170f7d5
1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * file.h - definitions for file(1) program
30 * @(#)$File: file.h,v 1.152 2014/06/03 19:01:34 christos Exp $
31 */
32
33#ifndef __file_h__
34#define __file_h__
35
36#ifdef HAVE_CONFIG_H
37#include <config.h>
38#endif
39
40#ifdef WIN32
41  #ifdef _WIN64
42    #define SIZE_T_FORMAT "I64"
43  #else
44    #define SIZE_T_FORMAT ""
45  #endif
46  #define INT64_T_FORMAT "I64"
47#else
48  #define SIZE_T_FORMAT "z"
49  #define INT64_T_FORMAT "ll"
50#endif
51
52#include <stdio.h>	/* Include that here, to make sure __P gets defined */
53#include <errno.h>
54#include <fcntl.h>	/* For open and flags */
55#ifdef HAVE_STDINT_H
56#ifndef __STDC_LIMIT_MACROS
57#define __STDC_LIMIT_MACROS
58#endif
59#include <stdint.h>
60#endif
61#ifdef HAVE_INTTYPES_H
62#include <inttypes.h>
63#endif
64#include <regex.h>
65#include <time.h>
66#include <sys/types.h>
67#include <sys/param.h>
68/* Do this here and now, because struct stat gets re-defined on solaris */
69#include <sys/stat.h>
70#include <stdarg.h>
71
72#define ENABLE_CONDITIONALS
73
74#ifndef MAGIC
75#define MAGIC "/etc/magic"
76#endif
77
78#if defined(__EMX__) || defined (WIN32)
79#define PATHSEP	';'
80#else
81#define PATHSEP	':'
82#endif
83
84#define private static
85
86#if HAVE_VISIBILITY && !defined(WIN32)
87#define public  __attribute__ ((__visibility__("default")))
88#ifndef protected
89#define protected __attribute__ ((__visibility__("hidden")))
90#endif
91#else
92#define public
93#ifndef protected
94#define protected
95#endif
96#endif
97
98#ifndef __arraycount
99#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
100#endif
101
102#ifndef __GNUC_PREREQ__
103#ifdef __GNUC__
104#define	__GNUC_PREREQ__(x, y)						\
105	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
106	 (__GNUC__ > (x)))
107#else
108#define	__GNUC_PREREQ__(x, y)	0
109#endif
110#endif
111
112#ifndef __GNUC__
113#ifndef __attribute__
114#define __attribute__(a)
115#endif
116#endif
117
118#ifndef MIN
119#define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
120#endif
121
122#ifndef MAX
123#define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
124#endif
125
126#ifndef HOWMANY
127# define HOWMANY (256 * 1024)	/* how much of the file to look at */
128#endif
129#define MAXMAGIS 8192		/* max entries in any one magic file
130				   or directory */
131#define MAXDESC	64		/* max len of text description/MIME type */
132#define MAXMIME	80		/* max len of text MIME type */
133#define MAXstring 64		/* max len of "string" types */
134
135#define MAGICNO		0xF11E041C
136#define VERSIONNO	12
137#define FILE_MAGICSIZE	248
138
139#define	FILE_LOAD	0
140#define FILE_CHECK	1
141#define FILE_COMPILE	2
142#define FILE_LIST	3
143
144union VALUETYPE {
145	uint8_t b;
146	uint16_t h;
147	uint32_t l;
148	uint64_t q;
149	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
150	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
151	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
152	char s[MAXstring];	/* the search string or regex pattern */
153	unsigned char us[MAXstring];
154	float f;
155	double d;
156};
157
158struct magic {
159	/* Word 1 */
160	uint16_t cont_level;	/* level of ">" */
161	uint8_t flag;
162#define INDIR		0x01	/* if '(...)' appears */
163#define OFFADD		0x02	/* if '>&' or '>...(&' appears */
164#define INDIROFFADD	0x04	/* if '>&(' appears */
165#define UNSIGNED	0x08	/* comparison is unsigned */
166#define NOSPACE		0x10	/* suppress space character before output */
167#define BINTEST		0x20	/* test is for a binary type (set only
168				   for top-level tests) */
169#define TEXTTEST	0x40	/* for passing to file_softmagic */
170
171	uint8_t factor;
172
173	/* Word 2 */
174	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
175	uint8_t vallen;		/* length of string value, if any */
176	uint8_t type;		/* comparison type (FILE_*) */
177	uint8_t in_type;	/* type of indirection */
178#define 			FILE_INVALID	0
179#define 			FILE_BYTE	1
180#define				FILE_SHORT	2
181#define				FILE_DEFAULT	3
182#define				FILE_LONG	4
183#define				FILE_STRING	5
184#define				FILE_DATE	6
185#define				FILE_BESHORT	7
186#define				FILE_BELONG	8
187#define				FILE_BEDATE	9
188#define				FILE_LESHORT	10
189#define				FILE_LELONG	11
190#define				FILE_LEDATE	12
191#define				FILE_PSTRING	13
192#define				FILE_LDATE	14
193#define				FILE_BELDATE	15
194#define				FILE_LELDATE	16
195#define				FILE_REGEX	17
196#define				FILE_BESTRING16	18
197#define				FILE_LESTRING16	19
198#define				FILE_SEARCH	20
199#define				FILE_MEDATE	21
200#define				FILE_MELDATE	22
201#define				FILE_MELONG	23
202#define				FILE_QUAD	24
203#define				FILE_LEQUAD	25
204#define				FILE_BEQUAD	26
205#define				FILE_QDATE	27
206#define				FILE_LEQDATE	28
207#define				FILE_BEQDATE	29
208#define				FILE_QLDATE	30
209#define				FILE_LEQLDATE	31
210#define				FILE_BEQLDATE	32
211#define				FILE_FLOAT	33
212#define				FILE_BEFLOAT	34
213#define				FILE_LEFLOAT	35
214#define				FILE_DOUBLE	36
215#define				FILE_BEDOUBLE	37
216#define				FILE_LEDOUBLE	38
217#define				FILE_BEID3	39
218#define				FILE_LEID3	40
219#define				FILE_INDIRECT	41
220#define				FILE_QWDATE	42
221#define				FILE_LEQWDATE	43
222#define				FILE_BEQWDATE	44
223#define				FILE_NAME	45
224#define				FILE_USE	46
225#define				FILE_CLEAR	47
226#define				FILE_NAMES_SIZE	48 /* size of array to contain all names */
227
228#define IS_STRING(t) \
229	((t) == FILE_STRING || \
230	 (t) == FILE_PSTRING || \
231	 (t) == FILE_BESTRING16 || \
232	 (t) == FILE_LESTRING16 || \
233	 (t) == FILE_REGEX || \
234	 (t) == FILE_SEARCH || \
235	 (t) == FILE_NAME || \
236	 (t) == FILE_USE)
237
238#define FILE_FMT_NONE 0
239#define FILE_FMT_NUM  1 /* "cduxXi" */
240#define FILE_FMT_STR  2 /* "s" */
241#define FILE_FMT_QUAD 3 /* "ll" */
242#define FILE_FMT_FLOAT 4 /* "eEfFgG" */
243#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
244
245	/* Word 3 */
246	uint8_t in_op;		/* operator for indirection */
247	uint8_t mask_op;	/* operator for mask */
248#ifdef ENABLE_CONDITIONALS
249	uint8_t cond;		/* conditional type */
250#else
251	uint8_t dummy;
252#endif
253	uint8_t factor_op;
254#define		FILE_FACTOR_OP_PLUS	'+'
255#define		FILE_FACTOR_OP_MINUS	'-'
256#define		FILE_FACTOR_OP_TIMES	'*'
257#define		FILE_FACTOR_OP_DIV	'/'
258#define		FILE_FACTOR_OP_NONE	'\0'
259
260#define				FILE_OPS	"&|^+-*/%"
261#define				FILE_OPAND	0
262#define				FILE_OPOR	1
263#define				FILE_OPXOR	2
264#define				FILE_OPADD	3
265#define				FILE_OPMINUS	4
266#define				FILE_OPMULTIPLY	5
267#define				FILE_OPDIVIDE	6
268#define				FILE_OPMODULO	7
269#define				FILE_OPS_MASK	0x07 /* mask for above ops */
270#define				FILE_UNUSED_1	0x08
271#define				FILE_UNUSED_2	0x10
272#define				FILE_UNUSED_3	0x20
273#define				FILE_OPINVERSE	0x40
274#define				FILE_OPINDIRECT	0x80
275
276#ifdef ENABLE_CONDITIONALS
277#define				COND_NONE	0
278#define				COND_IF		1
279#define				COND_ELIF	2
280#define				COND_ELSE	3
281#endif /* ENABLE_CONDITIONALS */
282
283	/* Word 4 */
284	uint32_t offset;	/* offset to magic number */
285	/* Word 5 */
286	int32_t in_offset;	/* offset from indirection */
287	/* Word 6 */
288	uint32_t lineno;	/* line number in magic file */
289	/* Word 7,8 */
290	union {
291		uint64_t _mask;	/* for use with numeric and date types */
292		struct {
293			uint32_t _count;	/* repeat/line count */
294			uint32_t _flags;	/* modifier flags */
295		} _s;		/* for use with string types */
296	} _u;
297#define num_mask _u._mask
298#define str_range _u._s._count
299#define str_flags _u._s._flags
300	/* Words 9-16 */
301	union VALUETYPE value;	/* either number or string */
302	/* Words 17-32 */
303	char desc[MAXDESC];	/* description */
304	/* Words 33-52 */
305	char mimetype[MAXMIME]; /* MIME type */
306	/* Words 53-54 */
307	char apple[8];
308};
309
310#define BIT(A)   (1 << (A))
311#define STRING_COMPACT_WHITESPACE		BIT(0)
312#define STRING_COMPACT_OPTIONAL_WHITESPACE	BIT(1)
313#define STRING_IGNORE_LOWERCASE			BIT(2)
314#define STRING_IGNORE_UPPERCASE			BIT(3)
315#define REGEX_OFFSET_START			BIT(4)
316#define STRING_TEXTTEST				BIT(5)
317#define STRING_BINTEST				BIT(6)
318#define PSTRING_1_BE				BIT(7)
319#define PSTRING_1_LE				BIT(7)
320#define PSTRING_2_BE				BIT(8)
321#define PSTRING_2_LE				BIT(9)
322#define PSTRING_4_BE				BIT(10)
323#define PSTRING_4_LE				BIT(11)
324#define REGEX_LINE_COUNT			BIT(11)
325#define PSTRING_LEN	\
326    (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
327#define PSTRING_LENGTH_INCLUDES_ITSELF		BIT(12)
328#define	STRING_TRIM				BIT(13)
329#define CHAR_COMPACT_WHITESPACE			'W'
330#define CHAR_COMPACT_OPTIONAL_WHITESPACE	'w'
331#define CHAR_IGNORE_LOWERCASE			'c'
332#define CHAR_IGNORE_UPPERCASE			'C'
333#define CHAR_REGEX_OFFSET_START			's'
334#define CHAR_TEXTTEST				't'
335#define	CHAR_TRIM				'T'
336#define CHAR_BINTEST				'b'
337#define CHAR_PSTRING_1_BE			'B'
338#define CHAR_PSTRING_1_LE			'B'
339#define CHAR_PSTRING_2_BE			'H'
340#define CHAR_PSTRING_2_LE			'h'
341#define CHAR_PSTRING_4_BE			'L'
342#define CHAR_PSTRING_4_LE			'l'
343#define CHAR_PSTRING_LENGTH_INCLUDES_ITSELF     'J'
344#define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
345#define STRING_DEFAULT_RANGE		100
346
347
348/* list of magic entries */
349struct mlist {
350	struct magic *magic;		/* array of magic entries */
351	uint32_t nmagic;		/* number of entries in array */
352	void *map;			/* internal resources used by entry */
353	struct mlist *next, *prev;
354};
355
356#ifdef __cplusplus
357#define CAST(T, b)	static_cast<T>(b)
358#define RCAST(T, b)	reinterpret_cast<T>(b)
359#else
360#define CAST(T, b)	(T)(b)
361#define RCAST(T, b)	(T)(b)
362#endif
363
364struct level_info {
365	int32_t off;
366	int got_match;
367#ifdef ENABLE_CONDITIONALS
368	int last_match;
369	int last_cond;	/* used for error checking by parse() */
370#endif
371};
372
373#define MAGIC_SETS	2
374
375struct magic_set {
376	struct mlist *mlist[MAGIC_SETS];	/* list of regular entries */
377	struct cont {
378		size_t len;
379		struct level_info *li;
380	} c;
381	struct out {
382		char *buf;		/* Accumulation buffer */
383		char *pbuf;		/* Printable buffer */
384	} o;
385	uint32_t offset;
386	int error;
387	int flags;			/* Control magic tests. */
388	int event_flags;		/* Note things that happened. */
389#define 		EVENT_HAD_ERR		0x01
390	const char *file;
391	size_t line;			/* current magic line number */
392
393	/* data for searches */
394	struct {
395		const char *s;		/* start of search in original source */
396		size_t s_len;		/* length of search region */
397		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
398		size_t rm_len;		/* match length */
399	} search;
400
401	/* FIXME: Make the string dynamically allocated so that e.g.
402	   strings matched in files can be longer than MAXstring */
403	union VALUETYPE ms_value;	/* either number or string */
404};
405
406/* Type for Unicode characters */
407typedef unsigned long unichar;
408
409struct stat;
410#define FILE_T_LOCAL	1
411#define FILE_T_WINDOWS	2
412protected const char *file_fmttime(uint64_t, int, char *);
413protected struct magic_set *file_ms_alloc(int);
414protected void file_ms_free(struct magic_set *);
415protected int file_buffer(struct magic_set *, int, const char *, const void *,
416    size_t);
417protected int file_fsmagic(struct magic_set *, const char *, struct stat *);
418protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
419protected int file_vprintf(struct magic_set *, const char *, va_list)
420    __attribute__((__format__(__printf__, 2, 0)));
421protected size_t file_printedlen(const struct magic_set *);
422protected int file_replace(struct magic_set *, const char *, const char *);
423protected int file_printf(struct magic_set *, const char *, ...)
424    __attribute__((__format__(__printf__, 2, 3)));
425protected int file_reset(struct magic_set *);
426protected int file_tryelf(struct magic_set *, int, const unsigned char *,
427    size_t);
428protected int file_trycdf(struct magic_set *, int, const unsigned char *,
429    size_t);
430#if HAVE_FORK
431protected int file_zmagic(struct magic_set *, int, const char *,
432    const unsigned char *, size_t);
433#endif
434protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t,
435    int);
436protected int file_ascmagic_with_encoding(struct magic_set *,
437    const unsigned char *, size_t, unichar *, size_t, const char *,
438    const char *, int);
439protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
440    unichar **, size_t *, const char **, const char **, const char **);
441protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
442protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
443    size_t, int, int);
444protected int file_apprentice(struct magic_set *, const char *, int);
445protected int file_magicfind(struct magic_set *, const char *, struct mlist *);
446protected uint64_t file_signextend(struct magic_set *, struct magic *,
447    uint64_t);
448protected void file_badread(struct magic_set *);
449protected void file_badseek(struct magic_set *);
450protected void file_oomem(struct magic_set *, size_t);
451protected void file_error(struct magic_set *, int, const char *, ...)
452    __attribute__((__format__(__printf__, 3, 4)));
453protected void file_magerror(struct magic_set *, const char *, ...)
454    __attribute__((__format__(__printf__, 2, 3)));
455protected void file_magwarn(struct magic_set *, const char *, ...)
456    __attribute__((__format__(__printf__, 2, 3)));
457protected void file_mdump(struct magic *);
458protected void file_showstr(FILE *, const char *, size_t);
459protected size_t file_mbswidth(const char *);
460protected const char *file_getbuffer(struct magic_set *);
461protected ssize_t sread(int, void *, size_t, int);
462protected int file_check_mem(struct magic_set *, unsigned int);
463protected int file_looks_utf8(const unsigned char *, size_t, unichar *,
464    size_t *);
465protected size_t file_pstring_length_size(const struct magic *);
466protected size_t file_pstring_get_length(const struct magic *, const char *);
467#ifdef __EMX__
468protected int file_os2_apptype(struct magic_set *, const char *, const void *,
469    size_t);
470#endif /* __EMX__ */
471
472typedef struct {
473	const char *pat;
474	char *old_lc_ctype;
475	int rc;
476	regex_t rx;
477} file_regex_t;
478
479protected int file_regcomp(file_regex_t *, const char *, int);
480protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *,
481    int);
482protected void file_regfree(file_regex_t *);
483protected void file_regerror(file_regex_t *, int, struct magic_set *);
484
485typedef struct {
486	char *buf;
487	uint32_t offset;
488} file_pushbuf_t;
489
490protected file_pushbuf_t *file_push_buffer(struct magic_set *);
491protected char  *file_pop_buffer(struct magic_set *, file_pushbuf_t *);
492
493#ifndef COMPILE_ONLY
494extern const char *file_names[];
495extern const size_t file_nnames;
496#endif
497
498#ifndef HAVE_STRERROR
499extern int sys_nerr;
500extern char *sys_errlist[];
501#define strerror(e) \
502	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
503#endif
504
505#ifndef HAVE_STRTOUL
506#define strtoul(a, b, c)	strtol(a, b, c)
507#endif
508
509#ifndef HAVE_PREAD
510ssize_t pread(int, void *, size_t, off_t);
511#endif
512#ifndef HAVE_VASPRINTF
513int vasprintf(char **, const char *, va_list);
514#endif
515#ifndef HAVE_ASPRINTF
516int asprintf(char **, const char *, ...);
517#endif
518
519#ifndef HAVE_STRLCPY
520size_t strlcpy(char *, const char *, size_t);
521#endif
522#ifndef HAVE_STRLCAT
523size_t strlcat(char *, const char *, size_t);
524#endif
525#ifndef HAVE_STRCASESTR
526char *strcasestr(const char *, const char *);
527#endif
528#ifndef HAVE_GETLINE
529ssize_t getline(char **, size_t *, FILE *);
530ssize_t getdelim(char **, size_t *, int, FILE *);
531#endif
532#ifndef HAVE_CTIME_R
533char   *ctime_r(const time_t *, char *);
534#endif
535#ifndef HAVE_ASCTIME_R
536char   *asctime_r(const struct tm *, char *);
537#endif
538#ifndef HAVE_FMTCHECK
539const char *fmtcheck(const char *, const char *)
540     __attribute__((__format_arg__(2)));
541#endif
542
543#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
544#define QUICK
545#endif
546
547#ifndef O_BINARY
548#define O_BINARY	0
549#endif
550
551#ifndef __cplusplus
552#if defined(__GNUC__) && (__GNUC__ >= 3)
553#define FILE_RCSID(id) \
554static const char rcsid[] __attribute__((__used__)) = id;
555#else
556#define FILE_RCSID(id) \
557static const char *rcsid(const char *p) { \
558	return rcsid(p = id); \
559}
560#endif
561#else
562#define FILE_RCSID(id)
563#endif
564
565#endif /* __file_h__ */
566