#!/bin/python """ This file and its contents are supplied under the terms of the Common Development and Distribution License ("CDDL"), version 1.0. You may only use this file in accordance with the terms of version 1.0 of the CDDL. A full copy of the text of the CDDL should have accompanied this source. A copy of the CDDL is also available via the Internet at http://www.illumos.org/license/CDDL. Copyright 2013 DEY Storage Systems, Inc. Scratch script to produce the widths.cm content from the widths text files. It converts numeric unicode to symbolic forms. """ SYMBOLS = {} def u8_str(val): """ Convert a numeric value to a string representing the UTF-8 encoding of the numeric value, which should be a valid Unicode code point. """ u8str = unichr(val).encode('utf-8') idx = 0 out = "" while idx < len(u8str): out += "\\x%X" % ord(u8str[idx]) idx += 1 return out def load_utf8(): """ This function loads the UTF-8 character map file, loading the symbols and the numeric values. The result goes into the global SYMBOLS array. """ lines = open("UTF-8.cm").readlines() for line in lines: items = line.split() if (len(items) != 2) or items[0].startswith("#"): continue (sym, val) = (items[0], items[1]) SYMBOLS[val] = sym def do_width_file(width, filename): """ This function takes a file pairs of unicode values (hex), each of which is a range of unicode values, that all have the given width. """ for line in open(filename).readlines(): if line.startswith("#"): continue vals = line.split() while len(vals) > 1: start = int(vals[0], 16) end = int(vals[1], 16) val = start while val <= end: key = u8_str(val) val += 1 sym = SYMBOLS.get(key, None) if sym == None: continue print "%s\t%d" % (sym, width) vals = vals[2:] if __name__ == "__main__": print "WIDTH" load_utf8() do_width_file(0, "widths-0.txt") do_width_file(2, "widths-2.txt") print "END WIDTH"