1*1f5207b7SJohn Levonfrom collections import defaultdict
2*1f5207b7SJohn Levonimport copy
3*1f5207b7SJohn Levonimport json
4*1f5207b7SJohn Levonimport sys
5*1f5207b7SJohn Levonimport pprint
6*1f5207b7SJohn Levon
7*1f5207b7SJohn Levonfrom constants import (
8*1f5207b7SJohn Levon    GLOBAL_BLACKLIST,
9*1f5207b7SJohn Levon    IMPL_DEP_FILE_STR,
10*1f5207b7SJohn Levon    OUTPUT_FILE_STR,
11*1f5207b7SJohn Levon    SYSCALL_PREFIXES,
12*1f5207b7SJohn Levon    ListType,
13*1f5207b7SJohn Levon    hardcode_syscall_read_fields,
14*1f5207b7SJohn Levon    hardcode_syscall_write_fields,
15*1f5207b7SJohn Levon)
16*1f5207b7SJohn Levon
17*1f5207b7SJohn Levonclass Parser(object):
18*1f5207b7SJohn Levon    def __init__(
19*1f5207b7SJohn Levon        self,
20*1f5207b7SJohn Levon        impl_dep_file_str=IMPL_DEP_FILE_STR,
21*1f5207b7SJohn Levon        output_file_str=OUTPUT_FILE_STR,
22*1f5207b7SJohn Levon        verbose=False,
23*1f5207b7SJohn Levon        pretty=False
24*1f5207b7SJohn Levon    ):
25*1f5207b7SJohn Levon        try:
26*1f5207b7SJohn Levon            self.impl_dep_file = file(impl_dep_file_str, 'r')
27*1f5207b7SJohn Levon            self.output_file = file(output_file_str + '.json', 'w+')
28*1f5207b7SJohn Levon            if verbose:
29*1f5207b7SJohn Levon                self.output_file_verbose = file(output_file_str + '_verbose.json', 'w+')
30*1f5207b7SJohn Levon            if pretty:
31*1f5207b7SJohn Levon                self.pretty_output_file = file(output_file_str + '.pretty', 'w+')
32*1f5207b7SJohn Levon                self.pretty_output_file_verbose = file(output_file_str + '_verbose.pretty', 'w+')
33*1f5207b7SJohn Levon        except IOError:
34*1f5207b7SJohn Levon            sys.stderr.write("ERROR: Cannot open files %s %s.\n" % (impl_dep_file_str, output_file_str))
35*1f5207b7SJohn Levon            sys.exit(1)
36*1f5207b7SJohn Levon        self.verbose = verbose
37*1f5207b7SJohn Levon        self.pretty = pretty
38*1f5207b7SJohn Levon        self.syscall_read_fields = defaultdict(set)
39*1f5207b7SJohn Levon        self.syscall_write_fields = defaultdict(set)
40*1f5207b7SJohn Levon        self.implicit_dependencies = defaultdict(set)
41*1f5207b7SJohn Levon        self.verbose_impl_dep = defaultdict(list)
42*1f5207b7SJohn Levon        self.deref_counter = defaultdict(int)  # count which struct->members are most common
43*1f5207b7SJohn Levon
44*1f5207b7SJohn Levon        for syscall,fields in hardcode_syscall_read_fields.iteritems():
45*1f5207b7SJohn Levon            self.syscall_read_fields[syscall].update(set(fields))
46*1f5207b7SJohn Levon
47*1f5207b7SJohn Levon        for syscall,fields in hardcode_syscall_write_fields.iteritems():
48*1f5207b7SJohn Levon            self.syscall_write_fields[syscall].update(set(fields))
49*1f5207b7SJohn Levon
50*1f5207b7SJohn Levon    def _sanitize_syscall(self, syscall):
51*1f5207b7SJohn Levon        for prefix in SYSCALL_PREFIXES:
52*1f5207b7SJohn Levon            if syscall.startswith(prefix):
53*1f5207b7SJohn Levon                return syscall[len(prefix):]
54*1f5207b7SJohn Levon        return syscall
55*1f5207b7SJohn Levon
56*1f5207b7SJohn Levon    def _deref_to_tuple(self, deref):
57*1f5207b7SJohn Levon        """ (struct a)->b ==> (a,b) """
58*1f5207b7SJohn Levon        struct, member = deref.split('->')
59*1f5207b7SJohn Levon        struct = struct[1:-1]  # strip parens
60*1f5207b7SJohn Levon        struct = struct.split(' ')[1]  # drop struct keyword
61*1f5207b7SJohn Levon        return (struct, member)
62*1f5207b7SJohn Levon
63*1f5207b7SJohn Levon    def _split_field(self, field):
64*1f5207b7SJohn Levon        field = field.strip()
65*1f5207b7SJohn Levon        field = field[1: -1]  # strip square brackets
66*1f5207b7SJohn Levon        derefs = [struct.strip() for struct in field.strip().split(',') if struct]
67*1f5207b7SJohn Levon        return map(
68*1f5207b7SJohn Levon            lambda deref: self._deref_to_tuple(deref),
69*1f5207b7SJohn Levon            derefs
70*1f5207b7SJohn Levon        )
71*1f5207b7SJohn Levon
72*1f5207b7SJohn Levon    def _sanitize_line(self, line):
73*1f5207b7SJohn Levon        syscall_and_listtype, field = line.split(':')
74*1f5207b7SJohn Levon        syscall, list_type = syscall_and_listtype.split(' ')
75*1f5207b7SJohn Levon        syscall = self._sanitize_syscall(syscall)
76*1f5207b7SJohn Levon        derefs = self._split_field(field)
77*1f5207b7SJohn Levon        return syscall, list_type, derefs
78*1f5207b7SJohn Levon
79*1f5207b7SJohn Levon    def _add_fields(self, syscall, list_type, derefs):
80*1f5207b7SJohn Levon        if list_type == ListType.READ:
81*1f5207b7SJohn Levon            d = self.syscall_read_fields
82*1f5207b7SJohn Levon        elif list_type == ListType.WRITE:
83*1f5207b7SJohn Levon            d = self.syscall_write_fields
84*1f5207b7SJohn Levon        for deref in derefs:
85*1f5207b7SJohn Levon            if deref in GLOBAL_BLACKLIST:  # ignore spammy structs
86*1f5207b7SJohn Levon                continue
87*1f5207b7SJohn Levon            d[syscall].add(deref)
88*1f5207b7SJohn Levon
89*1f5207b7SJohn Levon    def _construct_implicit_deps(self):
90*1f5207b7SJohn Levon        """ just do a naive O(n^2) loop to see intersections between write_list and read_list """
91*1f5207b7SJohn Levon        for this_call,read_fields in self.syscall_read_fields.iteritems():
92*1f5207b7SJohn Levon            for that_call,write_fields in self.syscall_write_fields.iteritems():
93*1f5207b7SJohn Levon                if that_call == this_call:  # calls are obviously dependent on themselves. ignore.
94*1f5207b7SJohn Levon                    continue
95*1f5207b7SJohn Levon                intersection = read_fields & write_fields
96*1f5207b7SJohn Levon                if intersection:
97*1f5207b7SJohn Levon                    self.implicit_dependencies[this_call].add(that_call)
98*1f5207b7SJohn Levon                if intersection and self.verbose:
99*1f5207b7SJohn Levon                    self.verbose_impl_dep[this_call].append({
100*1f5207b7SJohn Levon                        'call': that_call,
101*1f5207b7SJohn Levon                        'reason': intersection,
102*1f5207b7SJohn Levon                    })
103*1f5207b7SJohn Levon                    for deref in intersection:
104*1f5207b7SJohn Levon                        self.deref_counter[deref] += 1
105*1f5207b7SJohn Levon
106*1f5207b7SJohn Levon    def parse(self):
107*1f5207b7SJohn Levon        for line in self.impl_dep_file:
108*1f5207b7SJohn Levon            syscall, list_type, derefs = self._sanitize_line(line)
109*1f5207b7SJohn Levon            self._add_fields(syscall, list_type, derefs)
110*1f5207b7SJohn Levon        # pprint.pprint(dict(self.syscall_write_fields))
111*1f5207b7SJohn Levon        # pprint.pprint(dict(self.syscall_read_fields))
112*1f5207b7SJohn Levon        self._construct_implicit_deps()
113*1f5207b7SJohn Levon        # pprint.pprint(dict(self.implicit_dependencies))
114*1f5207b7SJohn Levon        # pprint.pprint(dict(self.verbose_impl_dep))
115*1f5207b7SJohn Levon
116*1f5207b7SJohn Levon    def _listify_verbose_reason(self, reason):
117*1f5207b7SJohn Levon        r = copy.deepcopy(reason)
118*1f5207b7SJohn Levon        r['reason'] = list(r['reason'])
119*1f5207b7SJohn Levon        r['reason'] = map(
120*1f5207b7SJohn Levon            lambda (struct,field): struct + '->' + field,
121*1f5207b7SJohn Levon            r['reason']
122*1f5207b7SJohn Levon        )
123*1f5207b7SJohn Levon        return r
124*1f5207b7SJohn Levon
125*1f5207b7SJohn Levon    def _get_json_dependencies(self):
126*1f5207b7SJohn Levon        implicit_dependencies = {}
127*1f5207b7SJohn Levon        verbose_impl_dep = {}
128*1f5207b7SJohn Levon        for call, dep_set in self.implicit_dependencies.iteritems():
129*1f5207b7SJohn Levon            implicit_dependencies[call] = list(dep_set)
130*1f5207b7SJohn Levon        for call, call_reasons in self.verbose_impl_dep.iteritems():
131*1f5207b7SJohn Levon            verbose_impl_dep[call] = map(
132*1f5207b7SJohn Levon                lambda reason: self._listify_verbose_reason(reason),
133*1f5207b7SJohn Levon                call_reasons,
134*1f5207b7SJohn Levon            )
135*1f5207b7SJohn Levon        return implicit_dependencies, verbose_impl_dep
136*1f5207b7SJohn Levon
137*1f5207b7SJohn Levon    def write(self):
138*1f5207b7SJohn Levon        implicit_dependencies, verbose_impl_dep = self._get_json_dependencies()
139*1f5207b7SJohn Levon        json.dump(implicit_dependencies, self.output_file)
140*1f5207b7SJohn Levon        if self.verbose:
141*1f5207b7SJohn Levon            json.dump(verbose_impl_dep, self.output_file_verbose)
142*1f5207b7SJohn Levon        if self.pretty:
143*1f5207b7SJohn Levon            pprint.pprint(dict(self.implicit_dependencies), self.pretty_output_file)
144*1f5207b7SJohn Levon            pprint.pprint(dict(self.verbose_impl_dep), self.pretty_output_file_verbose)
145*1f5207b7SJohn Levon        for deref, count in sorted(self.deref_counter.iteritems(), key=lambda (k,v): (v,k)):
146*1f5207b7SJohn Levon            print "%s: %d" % (deref, count)
147*1f5207b7SJohn Levon
148*1f5207b7SJohn Levon    def close(self):
149*1f5207b7SJohn Levon        self.output_file.close()
150*1f5207b7SJohn Levon        self.impl_dep_file.close()
151*1f5207b7SJohn Levon        if self.verbose:
152*1f5207b7SJohn Levon            self.output_file_verbose.close()
153