# Tamito KAJIYAMA <12 June 2001>
# $Id: hgen.py,v 1.2 2002/03/04 09:05:54 kajiyama Exp $

import sys
import os

progname = os.path.basename(sys.argv[0])

__version__ = "1.0"

# N is determined heuristically (must be >= 256)
N = 523

def read(filename, jis_column, ucs_column):
    file = open(filename)
    jis_map = []
    ucs_map = []
    for i in range(N):
        jis_map.append([])
        ucs_map.append([])
    while 1:
        line = file.readline()
        if not line:
            break
        if line[0] == '#':
            continue
        tokens = line.split()
        jis = int(tokens[jis_column], 16) | 0x8080
        ucs = int(tokens[ucs_column], 16)
        jis_map[jis % N].append((jis, ucs))
        ucs_map[ucs % N].append((ucs, jis))
    return jis_map, ucs_map

def dump(prefix, jis_map, ucs_map):
    for n in range(N):
        jis_map[n].sort()
        print "static unsigned char %s_jis_map_%d[] = {" % (prefix, n)
        print "    0x%02x," % len(jis_map[n])
        for jis, ucs in jis_map[n]:
            print "    0x%02x, 0x%02x, 0x%02x," % (jis/N, ucs/256, ucs%256)
        print "};"
    print "static unsigned char *%s_jis_map[] = {" % prefix
    for n in range(N):
        print "    %s_jis_map_%d," % (prefix, n)
    print "};"
    print
    for n in range(N):
        ucs_map[n].sort(lambda x, y: cmp((x[1], x[0]), (y[1], y[0])))
        print "static unsigned char %s_ucs_map_%d[] = {" % (prefix, n)
        print "    0x%02x," % len(ucs_map[n])
        for ucs, jis in ucs_map[n]:
            print "    0x%02x, 0x%02x, 0x%02x," % (ucs/N, jis/256, jis%256)
        print "};"
    print "static unsigned char *%s_ucs_map[] = {" % prefix
    for n in range(N):
        print "    %s_ucs_map_%d," % (prefix, n)
    print "};"

def dump_ms932(sjisfile, j0208file):
    ms932 = open(sjisfile).readlines()
    ms932 = [l.split() for l in ms932 if l and l[0] != '#']
    ms932 = [(int(l[0],16), int(l[1], 16)) for l in ms932 if l and l[1][0] != '#']
    ms932_dec = {}
    ms932_enc = {}
    for mbcs, ucs in ms932:
        ms932_dec[mbcs] = ucs
        ms932_enc[ucs] = mbcs
        
    j0208 = open(j0208file).readlines()
    j0208 = [l.split() for l in j0208 if l and l[0] != '#']
    j0208 = [(int(l[0],16), int(l[2], 16)) for l in j0208 if l and l[2][0] != '#']
    j0208_dec = {}
    j0208_enc = {}
    for mbcs, ucs in j0208:
        j0208_dec[mbcs] = ucs
        j0208_enc[ucs] = mbcs

    diff_dec = []
    for mbcs in ms932_dec.keys():
        try:
            if ms932_dec[mbcs] != j0208_dec[mbcs]:
                diff_dec.append(mbcs)
        except KeyError:
            diff_dec.append(mbcs)
    
    diff_dec = [d for d in diff_dec if d <> ms932_dec[d]]
    diff_dec.sort()
    
    jis_map = []
    ucs_map = []

    for i in range(N):
        jis_map.append([])
        ucs_map.append([])

    for sjis in diff_dec:
        ucs = ms932_dec[sjis]
        sjis = sjis
        jis_map[sjis % N].append((sjis, ucs))
        ucs_map[ucs % N].append((ucs, sjis))


    # for gaiji area
    dump("ms932", jis_map, ucs_map)

def main():
    table1_jis, table1_ucs = read("JIS0208.TXT", 1, 2)
    table2_jis, table2_ucs = read("JIS0212.TXT", 0, 1)
    print "/* This is an auto-generated file (by %s %s) */" % (
        progname, __version__)
    print "/* Do not edit!! */"
    print
    print "#define N", N
    print
    dump("jisx0208", table1_jis, table1_ucs)
    dump("jisx0212", table2_jis, table2_ucs)
    dump_ms932("MS932.TXT", "JIS0208.TXT")
    
if __name__ == "__main__":
    main()
