1from collections import defaultdict
2import copy
3import json
4import sys
5import pprint
6
7from constants import (
8    GLOBAL_BLACKLIST,
9    IMPL_DEP_FILE_STR,
10    OUTPUT_FILE_STR,
11    SYSCALL_PREFIXES,
12    ListType,
13    hardcode_syscall_read_fields,
14    hardcode_syscall_write_fields,
15)
16
17class Parser(object):
18    def __init__(
19        self,
20        impl_dep_file_str=IMPL_DEP_FILE_STR,
21        output_file_str=OUTPUT_FILE_STR,
22        verbose=False,
23        pretty=False
24    ):
25        try:
26            self.impl_dep_file = file(impl_dep_file_str, 'r')
27            self.output_file = file(output_file_str + '.json', 'w+')
28            if verbose:
29                self.output_file_verbose = file(output_file_str + '_verbose.json', 'w+')
30            if pretty:
31                self.pretty_output_file = file(output_file_str + '.pretty', 'w+')
32                self.pretty_output_file_verbose = file(output_file_str + '_verbose.pretty', 'w+')
33        except IOError:
34            sys.stderr.write("ERROR: Cannot open files %s %s.\n" % (impl_dep_file_str, output_file_str))
35            sys.exit(1)
36        self.verbose = verbose
37        self.pretty = pretty
38        self.syscall_read_fields = defaultdict(set)
39        self.syscall_write_fields = defaultdict(set)
40        self.implicit_dependencies = defaultdict(set)
41        self.verbose_impl_dep = defaultdict(list)
42        self.deref_counter = defaultdict(int)  # count which struct->members are most common
43
44        for syscall,fields in hardcode_syscall_read_fields.iteritems():
45            self.syscall_read_fields[syscall].update(set(fields))
46
47        for syscall,fields in hardcode_syscall_write_fields.iteritems():
48            self.syscall_write_fields[syscall].update(set(fields))
49
50    def _sanitize_syscall(self, syscall):
51        for prefix in SYSCALL_PREFIXES:
52            if syscall.startswith(prefix):
53                return syscall[len(prefix):]
54        return syscall
55
56    def _deref_to_tuple(self, deref):
57        """ (struct a)->b ==> (a,b) """
58        struct, member = deref.split('->')
59        struct = struct[1:-1]  # strip parens
60        struct = struct.split(' ')[1]  # drop struct keyword
61        return (struct, member)
62
63    def _split_field(self, field):
64        field = field.strip()
65        field = field[1: -1]  # strip square brackets
66        derefs = [struct.strip() for struct in field.strip().split(',') if struct]
67        return map(
68            lambda deref: self._deref_to_tuple(deref),
69            derefs
70        )
71
72    def _sanitize_line(self, line):
73        syscall_and_listtype, field = line.split(':')
74        syscall, list_type = syscall_and_listtype.split(' ')
75        syscall = self._sanitize_syscall(syscall)
76        derefs = self._split_field(field)
77        return syscall, list_type, derefs
78
79    def _add_fields(self, syscall, list_type, derefs):
80        if list_type == ListType.READ:
81            d = self.syscall_read_fields
82        elif list_type == ListType.WRITE:
83            d = self.syscall_write_fields
84        for deref in derefs:
85            if deref in GLOBAL_BLACKLIST:  # ignore spammy structs
86                continue
87            d[syscall].add(deref)
88
89    def _construct_implicit_deps(self):
90        """ just do a naive O(n^2) loop to see intersections between write_list and read_list """
91        for this_call,read_fields in self.syscall_read_fields.iteritems():
92            for that_call,write_fields in self.syscall_write_fields.iteritems():
93                if that_call == this_call:  # calls are obviously dependent on themselves. ignore.
94                    continue
95                intersection = read_fields & write_fields
96                if intersection:
97                    self.implicit_dependencies[this_call].add(that_call)
98                if intersection and self.verbose:
99                    self.verbose_impl_dep[this_call].append({
100                        'call': that_call,
101                        'reason': intersection,
102                    })
103                    for deref in intersection:
104                        self.deref_counter[deref] += 1
105
106    def parse(self):
107        for line in self.impl_dep_file:
108            syscall, list_type, derefs = self._sanitize_line(line)
109            self._add_fields(syscall, list_type, derefs)
110        # pprint.pprint(dict(self.syscall_write_fields))
111        # pprint.pprint(dict(self.syscall_read_fields))
112        self._construct_implicit_deps()
113        # pprint.pprint(dict(self.implicit_dependencies))
114        # pprint.pprint(dict(self.verbose_impl_dep))
115
116    def _listify_verbose_reason(self, reason):
117        r = copy.deepcopy(reason)
118        r['reason'] = list(r['reason'])
119        r['reason'] = map(
120            lambda (struct,field): struct + '->' + field,
121            r['reason']
122        )
123        return r
124
125    def _get_json_dependencies(self):
126        implicit_dependencies = {}
127        verbose_impl_dep = {}
128        for call, dep_set in self.implicit_dependencies.iteritems():
129            implicit_dependencies[call] = list(dep_set)
130        for call, call_reasons in self.verbose_impl_dep.iteritems():
131            verbose_impl_dep[call] = map(
132                lambda reason: self._listify_verbose_reason(reason),
133                call_reasons,
134            )
135        return implicit_dependencies, verbose_impl_dep
136
137    def write(self):
138        implicit_dependencies, verbose_impl_dep = self._get_json_dependencies()
139        json.dump(implicit_dependencies, self.output_file)
140        if self.verbose:
141            json.dump(verbose_impl_dep, self.output_file_verbose)
142        if self.pretty:
143            pprint.pprint(dict(self.implicit_dependencies), self.pretty_output_file)
144            pprint.pprint(dict(self.verbose_impl_dep), self.pretty_output_file_verbose)
145        for deref, count in sorted(self.deref_counter.iteritems(), key=lambda (k,v): (v,k)):
146            print "%s: %d" % (deref, count)
147
148    def close(self):
149        self.output_file.close()
150        self.impl_dep_file.close()
151        if self.verbose:
152            self.output_file_verbose.close()
153