1/* Copyright (c) 2018-2018, David Anderson
2All rights reserved.
3
4Redistribution and use in source and binary forms, with
5or without modification, are permitted provided that the
6following conditions are met:
7
8    Redistributions of source code must retain the above
9    copyright notice, this list of conditions and the following
10    disclaimer.
11
12    Redistributions in binary form must reproduce the above
13    copyright notice, this list of conditions and the following
14    disclaimer in the documentation and/or other materials
15    provided with the distribution.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
18CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*/
31
32#include "config.h"
33#include <stdio.h>
34#include <sys/types.h> /* open() */
35#include <sys/stat.h> /* open() */
36#include <fcntl.h> /* O_RDONLY */
37#ifdef HAVE_UNISTD_H
38#include <unistd.h> /* lseek read close */
39#elif defined(_WIN32) && defined(_MSC_VER)
40#include <io.h>
41#include <basetsd.h>
42typedef SSIZE_T ssize_t; /* MSVC does not have POSIX ssize_t */
43#endif /* HAVE_UNISTD_H */
44#ifdef HAVE_STRING_H
45#include <string.h> /* memcpy, strcpy */
46#endif /* HAVE_STRING_H */
47
48/* Windows specific header files */
49#if defined(_WIN32) && defined(HAVE_STDAFX_H)
50#include "stdafx.h"
51#endif /* HAVE_STDAFX_H */
52
53#include "libdwarf.h"
54#include "memcpy_swap.h"
55#include "dwarf_object_read_common.h"
56#include "dwarf_object_detector.h"
57
58#ifndef O_BINARY
59#define O_BINARY 0
60#endif /* O_BINARY */
61
62/* This is the main() program for the object_detector executable. */
63
64#ifndef TRUE
65#define TRUE 1
66#define FALSE 0
67#endif /* TRUE */
68
69#ifndef O_RDONLY
70#define O_RDONLY 0
71#endif
72
73/*  TYP, SIZEOFT32 and ASNAR
74    mean we can use correctly-sized arrays of char for the
75    struct members instead of determing a proper integer
76    that size.
77
78    We are dealing with carefully constructed structs
79    that do not have any alignment-forced (hidden)
80    unused bytes so reading lengths from the real structs
81    works for each variable.  */
82
83#define TYP(n,l) char n[l]
84#define SIZEOFT32 4
85
86
87#define DW_DLV_NO_ENTRY -1
88#define DW_DLV_OK        0
89#define DW_DLV_ERROR     1
90
91#ifndef EI_NIDENT
92#define EI_NIDENT 16
93#define EI_CLASS  4
94#define EI_DATA   5
95#define EI_VERSION 6
96#define ELFCLASS32 1
97#define ELFCLASS64 2
98#define ELFDATA2LSB 1
99#define ELFDATA2MSB 2
100#endif /* EI_NIDENT */
101
102#define DSYM_SUFFIX ".dSYM/Contents/Resources/DWARF/"
103#define PATHSIZE 2000
104
105#ifndef  MH_MAGIC
106/* mach-o 32bit */
107#define MH_MAGIC        0xfeedface
108#define MH_CIGAM        0xcefaedfe
109#endif /*  MH_MAGIC */
110#ifndef  MH_MAGIC_64
111/* mach-o 64bit */
112#define MH_MAGIC_64 0xfeedfacf
113#define MH_CIGAM_64 0xcffaedfe
114#endif /*  MH_MAGIC_64 */
115
116static unsigned long
117magic_copy(unsigned char *d, unsigned len)
118{
119    unsigned i = 0;
120    unsigned long v = 0;
121
122    v = d[0];
123    for(i = 1 ; i < len; ++i) {
124        v <<= 8;
125        v |=  d[i];
126    }
127    return v;
128}
129
130
131#ifdef WORDS_BIGENDIAN
132#define ASNAR(func,t,s)                         \
133    do {                                        \
134        unsigned tbyte = sizeof(t) - sizeof(s); \
135        t = 0;                                  \
136        func(((char *)&t)+tbyte ,&s[0],sizeof(s));  \
137    } while (0)
138#else /* LITTLE ENDIAN */
139#define ASNAR(func,t,s)                         \
140    do {                                        \
141        t = 0;                                  \
142        func(&t,&s[0],sizeof(s));               \
143    } while (0)
144#endif /* end LITTLE- BIG-ENDIAN */
145
146
147#define EI_NIDENT 16
148/* An incomplete elf header, good for 32 and 64bit elf */
149struct elf_header {
150    unsigned char  e_ident[EI_NIDENT];
151    TYP(e_type,2);
152    TYP(e_machine,2);
153    TYP(e_version,4);
154#ifdef HAVE_CUSTOM_LIBELF
155    /* In the case of custom ELF, use extra space */
156    TYP(e_custom,64);
157#endif /* HAVE_CUSTOM_LIBELF */
158};
159
160/*  Windows. Certain PE objects.
161    The following references may be of interest.
162https://msdn.microsoft.com/library/windows/desktop/ms680547(v=vs.85).aspx       #PE format overview and various machine magic numbers
163
164https://msdn.microsoft.com/en-us/library/ms809762.aspx  # describes some details of PE headers, basically an overview
165
166https://msdn.microsoft.com/en-us/library/windows/desktop/aa383751(v=vs.85).aspx #defines sizes of various types
167
168https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680313(v=vs.85).aspx #defines IMAGE_FILE_HEADER and Machine fields (32/64)
169
170https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680305(v=vs.85).aspx #defines IMAGE_DATA_DIRECTORY
171
172https://msdn.microsoft.com/en-us/library/windows/desktop/ms680339(v=vs.85).aspx #Defines IMAGE_OPTIONAL_HEADER and some magic numbers
173
174https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680336(v=vs.85).aspx # defines _IMAGE_NT_HEADERS 32 64
175
176https://msdn.microsoft.com/en-us/library/windows/desktop/ms680341(v=vs.85).aspx # defines _IMAGE_SECTION_HEADER
177
178*/
179
180/* ===== START pe structures */
181
182struct dos_header {
183    TYP(dh_mz,2);
184    TYP(dh_dos_data,58);
185    TYP(dh_image_offset,4);
186};
187
188#define IMAGE_DOS_SIGNATURE_dw      0x5A4D
189#define IMAGE_DOS_REVSIGNATURE_dw   0x4D5A
190#define IMAGE_NT_SIGNATURE_dw       0x00004550
191#define IMAGE_FILE_MACHINE_I386_dw  0x14c
192#define IMAGE_FILE_MACHINE_IA64_dw  0x200
193#define IMAGE_FILE_MACHINE_AMD64_dw 0x8664
194
195
196struct pe_image_file_header {
197    TYP(im_machine,2);
198    TYP(im_sectioncount,2);
199    TYP(im_ignoring,(3*4));
200    TYP(im_opt_header_size,2);
201    TYP(im_ignoringb,2);
202};
203
204/* ===== END pe structures */
205
206
207/*  For following MacOS file naming convention */
208static const char *
209getseparator (const char *f)
210{
211    const char *p = 0;
212    const char *q = 0;
213    char c = 0;;
214
215    p = NULL;
216    q = f;
217    do  {
218        c = *q++;
219        if (c == '\\' || c == '/' || c == ':') {
220            p = q;
221        }
222    } while (c);
223    return p;
224}
225
226static const char *
227getbasename (const char *f)
228{
229    const char *pseparator = getseparator (f);
230    if (!pseparator) {
231        return f;
232    }
233    return pseparator;
234}
235
236/*  Not a standard function, though part of GNU libc
237    since 2008 (I have never examined the GNU version).  */
238static char *
239dw_stpcpy(char *dest,const char *src)
240{
241    const char *cp = src;
242    char *dp = dest;
243
244    for ( ; *cp; ++cp,++dp) {
245        *dp = *cp;
246    }
247    *dp = 0;
248    return dp;
249}
250
251
252
253/* This started like Elf, so check initial fields. */
254static int
255fill_in_elf_fields(struct elf_header *h,
256    unsigned *endian,
257    /*  Size of the object file offsets, not DWARF offset
258        size. */
259    unsigned *objoffsetsize,
260    int *errcode)
261{
262    unsigned locendian = 0;
263    unsigned locoffsetsize = 0;
264
265    switch(h->e_ident[EI_CLASS]) {
266    case ELFCLASS32:
267        locoffsetsize = 32;
268        break;
269    case ELFCLASS64:
270        locoffsetsize = 64;
271        break;
272    default:
273        *errcode = DW_DLE_ELF_CLASS_BAD;
274        return DW_DLV_ERROR;
275    }
276    switch(h->e_ident[EI_DATA]) {
277    case ELFDATA2LSB:
278        locendian = DW_ENDIAN_LITTLE;
279        break;
280    case ELFDATA2MSB:
281        locendian = DW_ENDIAN_BIG;
282        break;
283    default:
284        *errcode = DW_DLE_ELF_ENDIAN_BAD;
285        return DW_DLV_ERROR;
286    }
287    if (h->e_ident[EI_VERSION] != 1 /* EV_CURRENT */) {
288        *errcode = DW_DLE_ELF_VERSION_BAD;
289        return DW_DLV_ERROR;
290    }
291    *endian = locendian;
292    *objoffsetsize = locoffsetsize;
293    return DW_DLV_OK;
294}
295static char archive_magic[8] = {
296'!','<','a','r','c','h','>',0x0a
297};
298static int
299is_archive_magic(struct elf_header *h) {
300    int i = 0;
301    int len = sizeof(archive_magic);
302    const char *cp = (const char *)h;
303    for( ; i < len; ++i) {
304        if (cp[i] != archive_magic[i]) {
305            return FALSE;
306        }
307    }
308    return TRUE;
309}
310
311/*  A bit unusual in that it always sets *is_pe_flag
312    Return of DW_DLV_OK  it is a PE file we recognize. */
313static int
314is_pe_object(int fd,
315    unsigned long filesize,
316    unsigned *endian,
317    unsigned *offsetsize,
318    int *errcode)
319{
320    unsigned dos_sig = 0;
321    unsigned locendian = 0;
322    void (*word_swap) (void *, const void *, unsigned long);
323    unsigned long nt_address = 0;
324    struct dos_header dhinmem;
325    char nt_sig_array[4];
326    unsigned long nt_sig = 0;
327    struct pe_image_file_header ifh;
328    int res = 0;
329
330    if (filesize < (sizeof (struct dos_header) +
331        SIZEOFT32 + sizeof(struct pe_image_file_header))) {
332        *errcode = DW_DLE_FILE_TOO_SMALL;
333        return DW_DLV_ERROR;
334    }
335    res = _dwarf_object_read_random(fd,(char *)&dhinmem,
336        0,sizeof(dhinmem),filesize,errcode);
337    if (res != DW_DLV_OK) {
338        return res;
339    }
340    /* No swap here, want it as in the file */
341    dos_sig = magic_copy((unsigned char *)dhinmem.dh_mz,
342        sizeof(dhinmem.dh_mz));
343    if (dos_sig == IMAGE_DOS_SIGNATURE_dw) {
344        /*  IMAGE_DOS_SIGNATURE_dw assumes bytes reversed by little-endian
345            load, so we intrepet a match the other way. */
346        /* BIG ENDIAN. From looking at hex characters in object  */
347#ifdef  WORDS_BIGENDIAN
348        word_swap = _dwarf_memcpy_noswap_bytes;
349#else   /* LITTLE ENDIAN */
350        word_swap =  _dwarf_memcpy_swap_bytes;
351#endif  /* LITTLE- BIG-ENDIAN */
352        locendian = DW_ENDIAN_BIG;
353    } else if (dos_sig == IMAGE_DOS_REVSIGNATURE_dw) {
354        /* raw load, so  intrepet a match the other way. */
355        /* LITTLE ENDIAN */
356#ifdef  WORDS_BIGENDIAN
357        word_swap =  _dwarf_memcpy_swap_bytes;
358#else   /* LITTLE ENDIAN */
359        word_swap = _dwarf_memcpy_noswap_bytes;
360#endif  /* LITTLE- BIG-ENDIAN */
361        locendian = DW_ENDIAN_LITTLE;
362    } else {
363        /* Not dos header not a PE file we recognize */
364        *errcode = DW_DLE_FILE_WRONG_TYPE;
365        return DW_DLV_ERROR;
366    }
367    ASNAR(word_swap,nt_address, dhinmem.dh_image_offset);
368    if (filesize < nt_address) {
369        /* Not dos header not a PE file we recognize */
370        *errcode = DW_DLE_FILE_TOO_SMALL;
371        return DW_DLV_ERROR;
372    }
373    if (filesize < (nt_address + SIZEOFT32 +
374        sizeof(struct pe_image_file_header))) {
375        *errcode = DW_DLE_FILE_TOO_SMALL;
376        /* Not dos header not a PE file we recognize */
377        return DW_DLV_ERROR;
378    }
379    res =  _dwarf_object_read_random(fd,(char *)&nt_sig_array[0],
380        nt_address, sizeof(nt_sig_array),filesize,errcode);
381    if (res != DW_DLV_OK) {
382        return res;
383    }
384    {   unsigned long lsig = 0;
385
386        ASNAR(word_swap,lsig,nt_sig_array);
387        nt_sig = lsig;
388    }
389    if (nt_sig != IMAGE_NT_SIGNATURE_dw) {
390        *errcode = DW_DLE_FILE_WRONG_TYPE;
391        return DW_DLV_ERROR;
392    }
393    res = _dwarf_object_read_random(fd,(char *)&ifh,
394        nt_address + SIZEOFT32,
395        sizeof(struct pe_image_file_header),
396        filesize,
397        errcode);
398    if (res != DW_DLV_OK) {
399        return res;
400    }
401    {
402        unsigned long machine = 0;
403
404        ASNAR(word_swap,machine,ifh.im_machine);
405        switch(machine) {
406        case IMAGE_FILE_MACHINE_I386_dw:
407            *offsetsize = 32;
408            *endian = locendian;
409            return DW_DLV_OK;
410        case IMAGE_FILE_MACHINE_IA64_dw:
411        case IMAGE_FILE_MACHINE_AMD64_dw:
412            *offsetsize = 64;
413            *endian = locendian;
414            return DW_DLV_OK;
415        }
416    }
417    *errcode = DW_DLE_IMAGE_FILE_UNKNOWN_TYPE;
418    return DW_DLV_ERROR;
419}
420
421static int
422is_mach_o_magic(struct elf_header *h,
423    unsigned *endian,
424    unsigned *offsetsize)
425{
426    unsigned long magicval = 0;
427    unsigned locendian = 0;
428    unsigned locoffsetsize = 0;
429
430    /*  No swapping here. Need to match size of
431        Mach-o magic field. */
432    magicval = magic_copy(h->e_ident,4);
433    if (magicval == MH_MAGIC) {
434        locendian = DW_ENDIAN_BIG;
435        locoffsetsize = 32;
436    } else if (magicval == MH_CIGAM) {
437        locendian = DW_ENDIAN_LITTLE;
438        locoffsetsize = 32;
439    }else if (magicval == MH_MAGIC_64) {
440        locendian = DW_ENDIAN_BIG;
441        locoffsetsize = 64;
442    } else if (magicval == MH_CIGAM_64) {
443        locendian = DW_ENDIAN_LITTLE;
444        locoffsetsize = 64;
445    } else {
446        return FALSE;
447    }
448    *endian = locendian;
449    *offsetsize = locoffsetsize;
450    return TRUE;
451}
452
453int
454dwarf_object_detector_fd(int fd,
455    unsigned *ftype,
456    unsigned *endian,
457    unsigned *offsetsize,
458    Dwarf_Unsigned  *filesize,
459    int *errcode)
460{
461    struct elf_header h;
462    size_t readlen = sizeof(h);
463    int res = 0;
464    off_t fsize = 0;
465    off_t lsval = 0;
466    ssize_t readval = 0;
467
468    fsize = lseek(fd,0L,SEEK_END);
469    if(fsize < 0) {
470        *errcode = DW_DLE_SEEK_ERROR;
471        return DW_DLV_ERROR;
472    }
473    if (fsize <= (off_t)readlen) {
474        /* Not a real object file */
475        *errcode = DW_DLE_FILE_TOO_SMALL;
476        return DW_DLV_ERROR;
477    }
478    lsval  = lseek(fd,0L,SEEK_SET);
479    if(lsval < 0) {
480        *errcode = DW_DLE_SEEK_ERROR;
481        return DW_DLV_ERROR;
482    }
483    readval = read(fd,&h,readlen);
484    if (readval != (ssize_t)readlen) {
485        *errcode = DW_DLE_READ_ERROR;
486        return DW_DLV_ERROR;
487    }
488    if (h.e_ident[0] == 0x7f &&
489        h.e_ident[1] == 'E' &&
490        h.e_ident[2] == 'L' &&
491        h.e_ident[3] == 'F') {
492        /* is ELF */
493
494        res = fill_in_elf_fields(&h,endian,offsetsize,errcode);
495        if (res != DW_DLV_OK) {
496            return res;
497        }
498        *ftype = DW_FTYPE_ELF;
499        *filesize = (size_t)fsize;
500        return DW_DLV_OK;
501    }
502    if (is_mach_o_magic(&h,endian,offsetsize)) {
503        *ftype = DW_FTYPE_MACH_O;
504        *filesize = (size_t)fsize;
505        return DW_DLV_OK;
506    }
507    if (is_archive_magic(&h)) {
508        *ftype = DW_FTYPE_ARCHIVE;
509        *filesize = (size_t)fsize;
510        return DW_DLV_OK;
511    }
512    res = is_pe_object(fd,fsize,endian,offsetsize,errcode);
513    if (res == DW_DLV_OK ) {
514        *ftype = DW_FTYPE_PE;
515        *filesize = (size_t)fsize;
516        return DW_DLV_OK;
517    }
518    /* Check for custom ELF format. */
519#ifdef HAVE_CUSTOM_LIBELF
520    res = elf_is_custom_format(&h,readlen,&fsize,endian,offsetsize,errcode);
521    if (res == DW_DLV_OK) {
522        *ftype = DW_FTYPE_CUSTOM_ELF;
523        *filesize = (size_t)fsize;
524        return res;
525    }
526#endif /* HAVE_CUSTOM_LIBELF */
527
528    /* Unknown object format. */
529    return DW_DLV_NO_ENTRY;
530}
531
532int
533dwarf_object_detector_path(const char  *path,
534    char *outpath,unsigned long outpath_len,
535    unsigned *ftype,
536    unsigned *endian,
537    unsigned *offsetsize,
538    Dwarf_Unsigned  *filesize,
539    int *errcode)
540{
541    char *cp = 0;
542    size_t plen = strlen(path);
543    size_t dsprefixlen = sizeof(DSYM_SUFFIX);
544    int fd = -1;
545    int res = 0;
546    int have_outpath = outpath && outpath_len;
547
548#if !defined(S_ISREG)
549#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
550#endif
551#if !defined(S_ISDIR)
552#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
553#endif
554
555    if (have_outpath) {
556        if ((2*plen + dsprefixlen +2) >= outpath_len) {
557            *errcode =  DW_DLE_PATH_SIZE_TOO_SMALL;
558            return DW_DLV_ERROR;
559        }
560        cp = dw_stpcpy(outpath,path);
561        cp = dw_stpcpy(cp,DSYM_SUFFIX);
562        dw_stpcpy(cp,getbasename(path));
563        fd = open(outpath,O_RDONLY|O_BINARY);
564        if (fd < 0) {
565            *outpath = 0;
566            fd = open(path,O_RDONLY|O_BINARY);
567            dw_stpcpy(outpath,path);
568        }
569    } else {
570        fd = open(path,O_RDONLY|O_BINARY);
571    }
572    if (fd < 0) {
573        if (have_outpath) {
574            *outpath = 0;
575        }
576        return DW_DLV_NO_ENTRY;
577    }
578    res = dwarf_object_detector_fd(fd,
579        ftype,endian,offsetsize,filesize,errcode);
580    if (res != DW_DLV_OK && have_outpath) {
581        *outpath = 0;
582    }
583    close(fd);
584    return res;
585}
586