1*1f5207b7SJohn Levonfrom collections import defaultdict 2*1f5207b7SJohn Levonimport copy 3*1f5207b7SJohn Levonimport json 4*1f5207b7SJohn Levonimport sys 5*1f5207b7SJohn Levonimport pprint 6*1f5207b7SJohn Levon 7*1f5207b7SJohn Levonfrom constants import ( 8*1f5207b7SJohn Levon GLOBAL_BLACKLIST, 9*1f5207b7SJohn Levon IMPL_DEP_FILE_STR, 10*1f5207b7SJohn Levon OUTPUT_FILE_STR, 11*1f5207b7SJohn Levon SYSCALL_PREFIXES, 12*1f5207b7SJohn Levon ListType, 13*1f5207b7SJohn Levon hardcode_syscall_read_fields, 14*1f5207b7SJohn Levon hardcode_syscall_write_fields, 15*1f5207b7SJohn Levon) 16*1f5207b7SJohn Levon 17*1f5207b7SJohn Levonclass Parser(object): 18*1f5207b7SJohn Levon def __init__( 19*1f5207b7SJohn Levon self, 20*1f5207b7SJohn Levon impl_dep_file_str=IMPL_DEP_FILE_STR, 21*1f5207b7SJohn Levon output_file_str=OUTPUT_FILE_STR, 22*1f5207b7SJohn Levon verbose=False, 23*1f5207b7SJohn Levon pretty=False 24*1f5207b7SJohn Levon ): 25*1f5207b7SJohn Levon try: 26*1f5207b7SJohn Levon self.impl_dep_file = file(impl_dep_file_str, 'r') 27*1f5207b7SJohn Levon self.output_file = file(output_file_str + '.json', 'w+') 28*1f5207b7SJohn Levon if verbose: 29*1f5207b7SJohn Levon self.output_file_verbose = file(output_file_str + '_verbose.json', 'w+') 30*1f5207b7SJohn Levon if pretty: 31*1f5207b7SJohn Levon self.pretty_output_file = file(output_file_str + '.pretty', 'w+') 32*1f5207b7SJohn Levon self.pretty_output_file_verbose = file(output_file_str + '_verbose.pretty', 'w+') 33*1f5207b7SJohn Levon except IOError: 34*1f5207b7SJohn Levon sys.stderr.write("ERROR: Cannot open files %s %s.\n" % (impl_dep_file_str, output_file_str)) 35*1f5207b7SJohn Levon sys.exit(1) 36*1f5207b7SJohn Levon self.verbose = verbose 37*1f5207b7SJohn Levon self.pretty = pretty 38*1f5207b7SJohn Levon self.syscall_read_fields = defaultdict(set) 39*1f5207b7SJohn Levon self.syscall_write_fields = defaultdict(set) 40*1f5207b7SJohn Levon self.implicit_dependencies = defaultdict(set) 41*1f5207b7SJohn Levon self.verbose_impl_dep = defaultdict(list) 42*1f5207b7SJohn Levon self.deref_counter = defaultdict(int) # count which struct->members are most common 43*1f5207b7SJohn Levon 44*1f5207b7SJohn Levon for syscall,fields in hardcode_syscall_read_fields.iteritems(): 45*1f5207b7SJohn Levon self.syscall_read_fields[syscall].update(set(fields)) 46*1f5207b7SJohn Levon 47*1f5207b7SJohn Levon for syscall,fields in hardcode_syscall_write_fields.iteritems(): 48*1f5207b7SJohn Levon self.syscall_write_fields[syscall].update(set(fields)) 49*1f5207b7SJohn Levon 50*1f5207b7SJohn Levon def _sanitize_syscall(self, syscall): 51*1f5207b7SJohn Levon for prefix in SYSCALL_PREFIXES: 52*1f5207b7SJohn Levon if syscall.startswith(prefix): 53*1f5207b7SJohn Levon return syscall[len(prefix):] 54*1f5207b7SJohn Levon return syscall 55*1f5207b7SJohn Levon 56*1f5207b7SJohn Levon def _deref_to_tuple(self, deref): 57*1f5207b7SJohn Levon """ (struct a)->b ==> (a,b) """ 58*1f5207b7SJohn Levon struct, member = deref.split('->') 59*1f5207b7SJohn Levon struct = struct[1:-1] # strip parens 60*1f5207b7SJohn Levon struct = struct.split(' ')[1] # drop struct keyword 61*1f5207b7SJohn Levon return (struct, member) 62*1f5207b7SJohn Levon 63*1f5207b7SJohn Levon def _split_field(self, field): 64*1f5207b7SJohn Levon field = field.strip() 65*1f5207b7SJohn Levon field = field[1: -1] # strip square brackets 66*1f5207b7SJohn Levon derefs = [struct.strip() for struct in field.strip().split(',') if struct] 67*1f5207b7SJohn Levon return map( 68*1f5207b7SJohn Levon lambda deref: self._deref_to_tuple(deref), 69*1f5207b7SJohn Levon derefs 70*1f5207b7SJohn Levon ) 71*1f5207b7SJohn Levon 72*1f5207b7SJohn Levon def _sanitize_line(self, line): 73*1f5207b7SJohn Levon syscall_and_listtype, field = line.split(':') 74*1f5207b7SJohn Levon syscall, list_type = syscall_and_listtype.split(' ') 75*1f5207b7SJohn Levon syscall = self._sanitize_syscall(syscall) 76*1f5207b7SJohn Levon derefs = self._split_field(field) 77*1f5207b7SJohn Levon return syscall, list_type, derefs 78*1f5207b7SJohn Levon 79*1f5207b7SJohn Levon def _add_fields(self, syscall, list_type, derefs): 80*1f5207b7SJohn Levon if list_type == ListType.READ: 81*1f5207b7SJohn Levon d = self.syscall_read_fields 82*1f5207b7SJohn Levon elif list_type == ListType.WRITE: 83*1f5207b7SJohn Levon d = self.syscall_write_fields 84*1f5207b7SJohn Levon for deref in derefs: 85*1f5207b7SJohn Levon if deref in GLOBAL_BLACKLIST: # ignore spammy structs 86*1f5207b7SJohn Levon continue 87*1f5207b7SJohn Levon d[syscall].add(deref) 88*1f5207b7SJohn Levon 89*1f5207b7SJohn Levon def _construct_implicit_deps(self): 90*1f5207b7SJohn Levon """ just do a naive O(n^2) loop to see intersections between write_list and read_list """ 91*1f5207b7SJohn Levon for this_call,read_fields in self.syscall_read_fields.iteritems(): 92*1f5207b7SJohn Levon for that_call,write_fields in self.syscall_write_fields.iteritems(): 93*1f5207b7SJohn Levon if that_call == this_call: # calls are obviously dependent on themselves. ignore. 94*1f5207b7SJohn Levon continue 95*1f5207b7SJohn Levon intersection = read_fields & write_fields 96*1f5207b7SJohn Levon if intersection: 97*1f5207b7SJohn Levon self.implicit_dependencies[this_call].add(that_call) 98*1f5207b7SJohn Levon if intersection and self.verbose: 99*1f5207b7SJohn Levon self.verbose_impl_dep[this_call].append({ 100*1f5207b7SJohn Levon 'call': that_call, 101*1f5207b7SJohn Levon 'reason': intersection, 102*1f5207b7SJohn Levon }) 103*1f5207b7SJohn Levon for deref in intersection: 104*1f5207b7SJohn Levon self.deref_counter[deref] += 1 105*1f5207b7SJohn Levon 106*1f5207b7SJohn Levon def parse(self): 107*1f5207b7SJohn Levon for line in self.impl_dep_file: 108*1f5207b7SJohn Levon syscall, list_type, derefs = self._sanitize_line(line) 109*1f5207b7SJohn Levon self._add_fields(syscall, list_type, derefs) 110*1f5207b7SJohn Levon # pprint.pprint(dict(self.syscall_write_fields)) 111*1f5207b7SJohn Levon # pprint.pprint(dict(self.syscall_read_fields)) 112*1f5207b7SJohn Levon self._construct_implicit_deps() 113*1f5207b7SJohn Levon # pprint.pprint(dict(self.implicit_dependencies)) 114*1f5207b7SJohn Levon # pprint.pprint(dict(self.verbose_impl_dep)) 115*1f5207b7SJohn Levon 116*1f5207b7SJohn Levon def _listify_verbose_reason(self, reason): 117*1f5207b7SJohn Levon r = copy.deepcopy(reason) 118*1f5207b7SJohn Levon r['reason'] = list(r['reason']) 119*1f5207b7SJohn Levon r['reason'] = map( 120*1f5207b7SJohn Levon lambda (struct,field): struct + '->' + field, 121*1f5207b7SJohn Levon r['reason'] 122*1f5207b7SJohn Levon ) 123*1f5207b7SJohn Levon return r 124*1f5207b7SJohn Levon 125*1f5207b7SJohn Levon def _get_json_dependencies(self): 126*1f5207b7SJohn Levon implicit_dependencies = {} 127*1f5207b7SJohn Levon verbose_impl_dep = {} 128*1f5207b7SJohn Levon for call, dep_set in self.implicit_dependencies.iteritems(): 129*1f5207b7SJohn Levon implicit_dependencies[call] = list(dep_set) 130*1f5207b7SJohn Levon for call, call_reasons in self.verbose_impl_dep.iteritems(): 131*1f5207b7SJohn Levon verbose_impl_dep[call] = map( 132*1f5207b7SJohn Levon lambda reason: self._listify_verbose_reason(reason), 133*1f5207b7SJohn Levon call_reasons, 134*1f5207b7SJohn Levon ) 135*1f5207b7SJohn Levon return implicit_dependencies, verbose_impl_dep 136*1f5207b7SJohn Levon 137*1f5207b7SJohn Levon def write(self): 138*1f5207b7SJohn Levon implicit_dependencies, verbose_impl_dep = self._get_json_dependencies() 139*1f5207b7SJohn Levon json.dump(implicit_dependencies, self.output_file) 140*1f5207b7SJohn Levon if self.verbose: 141*1f5207b7SJohn Levon json.dump(verbose_impl_dep, self.output_file_verbose) 142*1f5207b7SJohn Levon if self.pretty: 143*1f5207b7SJohn Levon pprint.pprint(dict(self.implicit_dependencies), self.pretty_output_file) 144*1f5207b7SJohn Levon pprint.pprint(dict(self.verbose_impl_dep), self.pretty_output_file_verbose) 145*1f5207b7SJohn Levon for deref, count in sorted(self.deref_counter.iteritems(), key=lambda (k,v): (v,k)): 146*1f5207b7SJohn Levon print "%s: %d" % (deref, count) 147*1f5207b7SJohn Levon 148*1f5207b7SJohn Levon def close(self): 149*1f5207b7SJohn Levon self.output_file.close() 150*1f5207b7SJohn Levon self.impl_dep_file.close() 151*1f5207b7SJohn Levon if self.verbose: 152*1f5207b7SJohn Levon self.output_file_verbose.close() 153