12da1cd3aSGarrett D'Amore#!/bin/python
22da1cd3aSGarrett D'Amore"""
32da1cd3aSGarrett D'Amore
42da1cd3aSGarrett D'AmoreThis file and its contents are supplied under the terms of the
52da1cd3aSGarrett D'AmoreCommon Development and Distribution License ("CDDL"), version 1.0.
62da1cd3aSGarrett D'AmoreYou may only use this file in accordance with the terms of version
72da1cd3aSGarrett D'Amore1.0 of the CDDL.
82da1cd3aSGarrett D'Amore
92da1cd3aSGarrett D'AmoreA full copy of the text of the CDDL should have accompanied this
102da1cd3aSGarrett D'Amoresource.  A copy of the CDDL is also available via the Internet at
112da1cd3aSGarrett D'Amorehttp://www.illumos.org/license/CDDL.
122da1cd3aSGarrett D'Amore
132da1cd3aSGarrett D'AmoreCopyright 2013 DEY Storage Systems, Inc.
142da1cd3aSGarrett D'Amore
152da1cd3aSGarrett D'AmoreScratch script to produce the widths.cm content from the widths text
162da1cd3aSGarrett D'Amorefiles.  It converts numeric unicode to symbolic forms.
172da1cd3aSGarrett D'Amore"""
182da1cd3aSGarrett D'Amore
19*ca13eaa5SAndy Fiddaman# Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
20*ca13eaa5SAndy Fiddaman
21*ca13eaa5SAndy Fiddamanfrom __future__ import print_function
22*ca13eaa5SAndy Fiddaman
232da1cd3aSGarrett D'AmoreSYMBOLS = {}
242da1cd3aSGarrett D'Amore
252da1cd3aSGarrett D'Amore
262da1cd3aSGarrett D'Amoredef u8_str(val):
272da1cd3aSGarrett D'Amore    """
282da1cd3aSGarrett D'Amore    Convert a numeric value to a string representing the UTF-8 encoding
292da1cd3aSGarrett D'Amore    of the numeric value, which should be a valid Unicode code point.
302da1cd3aSGarrett D'Amore    """
312da1cd3aSGarrett D'Amore    u8str = unichr(val).encode('utf-8')
322da1cd3aSGarrett D'Amore    idx = 0
332da1cd3aSGarrett D'Amore    out = ""
342da1cd3aSGarrett D'Amore    while idx < len(u8str):
352da1cd3aSGarrett D'Amore        out += "\\x%X" % ord(u8str[idx])
362da1cd3aSGarrett D'Amore        idx += 1
372da1cd3aSGarrett D'Amore    return out
382da1cd3aSGarrett D'Amore
392da1cd3aSGarrett D'Amore
402da1cd3aSGarrett D'Amoredef load_utf8():
412da1cd3aSGarrett D'Amore    """
422da1cd3aSGarrett D'Amore    This function loads the UTF-8 character map file, loading the symbols
432da1cd3aSGarrett D'Amore    and the numeric values.  The result goes into the global SYMBOLS array.
442da1cd3aSGarrett D'Amore    """
452da1cd3aSGarrett D'Amore    lines = open("UTF-8.cm").readlines()
462da1cd3aSGarrett D'Amore    for line in lines:
472da1cd3aSGarrett D'Amore        items = line.split()
482da1cd3aSGarrett D'Amore        if (len(items) != 2) or items[0].startswith("#"):
492da1cd3aSGarrett D'Amore            continue
502da1cd3aSGarrett D'Amore        (sym, val) = (items[0], items[1])
512da1cd3aSGarrett D'Amore        SYMBOLS[val] = sym
522da1cd3aSGarrett D'Amore
532da1cd3aSGarrett D'Amore
542da1cd3aSGarrett D'Amoredef do_width_file(width, filename):
552da1cd3aSGarrett D'Amore    """
562da1cd3aSGarrett D'Amore    This function takes a file pairs of unicode values (hex), each of
572da1cd3aSGarrett D'Amore    which is a range of unicode values, that all have the given width.
582da1cd3aSGarrett D'Amore    """
592da1cd3aSGarrett D'Amore    for line in open(filename).readlines():
602da1cd3aSGarrett D'Amore        if line.startswith("#"):
612da1cd3aSGarrett D'Amore            continue
622da1cd3aSGarrett D'Amore        vals = line.split()
632da1cd3aSGarrett D'Amore        while len(vals) > 1:
642da1cd3aSGarrett D'Amore            start = int(vals[0], 16)
652da1cd3aSGarrett D'Amore            end = int(vals[1], 16)
662da1cd3aSGarrett D'Amore            val = start
672da1cd3aSGarrett D'Amore            while val <= end:
682da1cd3aSGarrett D'Amore                key = u8_str(val)
692da1cd3aSGarrett D'Amore                val += 1
702da1cd3aSGarrett D'Amore                sym = SYMBOLS.get(key, None)
712da1cd3aSGarrett D'Amore                if sym == None:
722da1cd3aSGarrett D'Amore                    continue
73*ca13eaa5SAndy Fiddaman                print("%s\t%d" % (sym, width))
742da1cd3aSGarrett D'Amore            vals = vals[2:]
752da1cd3aSGarrett D'Amore
762da1cd3aSGarrett D'Amore
772da1cd3aSGarrett D'Amoreif __name__ == "__main__":
78*ca13eaa5SAndy Fiddaman    print("WIDTH")
792da1cd3aSGarrett D'Amore    load_utf8()
802da1cd3aSGarrett D'Amore    do_width_file(0, "widths-0.txt")
812da1cd3aSGarrett D'Amore    do_width_file(2, "widths-2.txt")
82*ca13eaa5SAndy Fiddaman    print("END WIDTH")
83