diff options
Diffstat (limited to 'usr/src/cmd/localedef/mkwidths.py')
-rw-r--r-- | usr/src/cmd/localedef/mkwidths.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/usr/src/cmd/localedef/mkwidths.py b/usr/src/cmd/localedef/mkwidths.py new file mode 100644 index 0000000000..19a587419d --- /dev/null +++ b/usr/src/cmd/localedef/mkwidths.py @@ -0,0 +1,78 @@ +#!/bin/python +""" + +This file and its contents are supplied under the terms of the +Common Development and Distribution License ("CDDL"), version 1.0. +You may only use this file in accordance with the terms of version +1.0 of the CDDL. + +A full copy of the text of the CDDL should have accompanied this +source. A copy of the CDDL is also available via the Internet at +http://www.illumos.org/license/CDDL. + +Copyright 2013 DEY Storage Systems, Inc. + +Scratch script to produce the widths.cm content from the widths text +files. It converts numeric unicode to symbolic forms. +""" + +SYMBOLS = {} + + +def u8_str(val): + """ + Convert a numeric value to a string representing the UTF-8 encoding + of the numeric value, which should be a valid Unicode code point. + """ + u8str = unichr(val).encode('utf-8') + idx = 0 + out = "" + while idx < len(u8str): + out += "\\x%X" % ord(u8str[idx]) + idx += 1 + return out + + +def load_utf8(): + """ + This function loads the UTF-8 character map file, loading the symbols + and the numeric values. The result goes into the global SYMBOLS array. + """ + lines = open("UTF-8.cm").readlines() + for line in lines: + items = line.split() + if (len(items) != 2) or items[0].startswith("#"): + continue + (sym, val) = (items[0], items[1]) + SYMBOLS[val] = sym + + +def do_width_file(width, filename): + """ + This function takes a file pairs of unicode values (hex), each of + which is a range of unicode values, that all have the given width. + """ + for line in open(filename).readlines(): + if line.startswith("#"): + continue + vals = line.split() + while len(vals) > 1: + start = int(vals[0], 16) + end = int(vals[1], 16) + val = start + while val <= end: + key = u8_str(val) + val += 1 + sym = SYMBOLS.get(key, None) + if sym == None: + continue + print "%s\t%d" % (sym, width) + vals = vals[2:] + + +if __name__ == "__main__": + print "WIDTH" + load_utf8() + do_width_file(0, "widths-0.txt") + do_width_file(2, "widths-2.txt") + print "END WIDTH" |