#! ruby -Ks

# make_jukugo_list.rb
# ʂ̏nꎚoXgƏnꁨeQƕ\
# ǂݕtoXg𐶐

# edit_yomi: ǂ݂ҏWČ㉹Ƌ𕪂
def edit_yomi(yomi)
    # ʂɂG[f[^̒
    yomi.gsub!(/ijij/, "ij")
    yomi.gsub!(/ڂi΂ji܂j/, "ڂi΂jEi܂j")
    yomi.gsub!(/₦ijij/, "₦ijEij")

    # "ǂ1iǂ2jǂ3"  "ǂ1ǂ3iǂ2ǂ3j"
    # : "ij"  "ij"
    yomi.gsub!(/([-]+)i([-]+)j([-]+)/, "\\1\\3i\\2\\3j")

    # "ǂ1 ㉹ij"  "ǂ1 ㉹iǂ1 j"
    # : "i킢j"  "i킢j"
    # @BIɕϊłȂ̂ŁAׂĂ̌񋓂
    yomi.gsub!(/([-]+)͂i͂j/, "\\1͂i\\1͂j")
    yomi.gsub!(/([-]+)i킢j/, "\\1i\\1킢j")
    yomi.gsub!(/([-]+)i킢j/, "\\1i\\1킢j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)䂤ij/, "\\1䂤i\\1j")
    yomi.gsub!(/([-]+)悤ij/, "\\1悤i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)䂤ij/, "\\1䂤i\\1j")
    yomi.gsub!(/([-]+)䂤ij/, "\\1䂤i\\1j")
    yomi.gsub!(/([-]+)悤ij/, "\\1悤i\\1j")
    yomi.gsub!(/([-]+)悤ij/, "\\1悤i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)䂤ij/, "\\1䂤i\\1j")
    yomi.gsub!(/([-]+)䂤ij/, "\\1䂤i\\1j")
    yomi.gsub!(/([-]+)悤ij/, "\\1悤i\\1j")
    yomi.gsub!(/([-]+)悤ij/, "\\1悤i\\1j")
    yomi.gsub!(/([-]+)Ƃij/, "\\1Ƃi\\1j")
    yomi.gsub!(/([-]+)ǂij/, "\\1ǂi\\1j")
    yomi.gsub!(/([-]+)䂤ij/, "\\1䂤i\\1j")
    yomi.gsub!(/([-]+)䂤ij/, "\\1䂤i\\1j")
    yomi.gsub!(/([-]+)悤iĂj/, "\\1悤i\\1Ăj")
    yomi.gsub!(/([-]+)悤iłj/, "\\1悤i\\1łj")
    yomi.gsub!(/([-]+)̂iȂj/, "\\1̂i\\1Ȃj")
    yomi.gsub!(/([-]+)ɂ䂤iɂj/, "\\1ɂ䂤i\\1ɂj")
    yomi.gsub!(/([-]+)ɂ悤i˂j/, "\\1ɂ悤i\\1˂j")
    yomi.gsub!(/([-]+)قi͂j/, "\\1قi\\1͂j")
    yomi.gsub!(/([-]+)ڂi΂j/, "\\1ڂi\\1΂j")
    yomi.gsub!(/([-]+)ۂiςj/, "\\1ۂi\\1ςj")
    yomi.gsub!(/([-]+)т䂤iтj/, "\\1т䂤i\\1тj")
    yomi.gsub!(/([-]+)Ђ悤iւj/, "\\1Ђ悤i\\1ւj")
    yomi.gsub!(/([-]+)т悤iׂj/, "\\1т悤i\\1ׂj")
    yomi.gsub!(/([-]+)҂悤i؂j/, "\\1҂悤i\\1؂j")
    yomi.gsub!(/([-]+)i܂j/, "\\1i\\1܂j")
    yomi.gsub!(/([-]+)݂悤i߂j/, "\\1݂悤i\\1߂j")
    yomi.gsub!(/([-]+)悤i₤j/, "\\1悤i\\1₤j")
    yomi.gsub!(/([-]+)悤i₤j/, "\\1悤i\\1₤j")
    yomi.gsub!(/([-]+)悤i₤j/, "\\1悤i\\1₤j")
    yomi.gsub!(/([-]+)悤i₤j/, "\\1悤i\\1₤j")
    yomi.gsub!(/([-]+)悤i₤j/, "\\1悤i\\1₤j")
    yomi.gsub!(/([-]+)悤i₤j/, "\\1悤i\\1₤j")
    yomi.gsub!(/([-]+)悤i₤j/, "\\1悤i\\1₤j")
    yomi.gsub!(/([-]+)Ђ悤iЂ₤j/, "\\1Ђ悤i\\1Ђ₤j")
    yomi.gsub!(/([-]+)т悤iт₤j/, "\\1т悤i\\1т₤j")
    yomi.gsub!(/([-]+)҂悤i҂₤j/, "\\1҂悤i\\1҂₤j")
    yomi.gsub!(/([-]+)݂悤i݂₤j/, "\\1݂悤i\\1݂₤j")
    yomi.gsub!(/([-]+)悤i₤j/, "\\1悤i\\1₤j")
    yomi.gsub!(/([-]+)䂤i䂤j/, "\\1䂤i\\1䂤j")
    yomi.gsub!(/([-]+)낤i炤j/, "\\1낤i\\1炤j")
    yomi.gsub!(/([-]+)䂤i肤j/, "\\1䂤i\\1肤j")
    yomi.gsub!(/([-]+)悤iꂤj/, "\\1悤i\\1ꂤj")
    yomi.gsub!(/([-]+)i키j/, "\\1i\\1키j")
    yomi.gsub!(/([-]+)i키j/, "\\1i\\1키j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)i킭j/, "\\1i\\1킭j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)񂰁i킳񂰁j/, "\\1񂰁i\\1킳񂰁j")
    yomi.gsub!(/([-]+)񂰁i񂰁j/, "\\1񂰁i\\1񂰁j")
    yomi.gsub!(/([-]+)iÂj/, "\\1i\\1Âj")
    yomi.gsub!(/([-]+)ꂼiÂꂼj/, "\\1ꂼi\\1Âꂼj")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)iÁj/, "\\1i\\1Áj")
    yomi.gsub!(/([-]+)íj/, "\\1i\\1́j")
    yomi.gsub!(/([-]+)iЁj/, "\\1i\\1Ёj")
    yomi.gsub!(/([-]+)iӁj/, "\\1i\\1Ӂj")
    yomi.gsub!(/([-]+)iӁj/, "\\1i\\1Ӂj")
    yomi.gsub!(/([-]+)䂤iӁj/, "\\1䂤i\\1Ӂj")
    yomi.gsub!(/([-]+)悤iӁj/, "\\1悤i\\1Ӂj")
    yomi.gsub!(/([-]+)iӁj/, "\\1i\\1Ӂj")
    yomi.gsub!(/([-]+)iӁj/, "\\1i\\1Ӂj")
    yomi.gsub!(/([-]+)䂤iӁj/, "\\1䂤i\\1Ӂj")
    yomi.gsub!(/([-]+)悤iӁj/, "\\1悤i\\1Ӂj")
    yomi.gsub!(/([-]+)悤iӁj/, "\\1悤i\\1Ӂj")
    yomi.gsub!(/([-]+)iӁj/, "\\1i\\1Ӂj")
    yomi.gsub!(/([-]+)iӁj/, "\\1i\\1Ӂj")
    yomi.gsub!(/([-]+)䂤iӁj/, "\\1䂤i\\1Ӂj")
    yomi.gsub!(/([-]+)䂤iӁj/, "\\1䂤i\\1Ӂj")
    yomi.gsub!(/([-]+)悤iӁj/, "\\1悤i\\1Ӂj")
    yomi.gsub!(/([-]+)ƂiӁj/, "\\1Ƃi\\1Ӂj")
    yomi.gsub!(/([-]+)悤iĂӁj/, "\\1悤i\\1ĂӁj")
    yomi.gsub!(/([-]+)悤iłӁj/, "\\1悤i\\1łӁj")
    yomi.gsub!(/([-]+)̂iȂӁj/, "\\1̂i\\1ȂӁj")
    yomi.gsub!(/([-]+)ɂ䂤iɂӁj/, "\\1ɂ䂤i\\1ɂӁj")
    yomi.gsub!(/([-]+)قi͂Ӂj/, "\\1قi\\1͂Ӂj")
    yomi.gsub!(/([-]+)ڂi΂Ӂj/, "\\1ڂi\\1΂Ӂj")
    yomi.gsub!(/([-]+)ۂiςӁj/, "\\1ۂi\\1ςӁj")
    yomi.gsub!(/([-]+)낤iӁj/, "\\1낤i\\1Ӂj")
    yomi.gsub!(/([-]+)䂤iӁj/, "\\1䂤i\\1Ӂj")
    yomi.gsub!(/([-]+)悤iӁj/, "\\1悤i\\1Ӂj")
    yomi.gsub!(/([-]+)iցj/, "\\1i\\1ցj")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
    yomi.gsub!(/([-]+)ij/, "\\1i\\1j")
end

def add_yomi_tag(yomi)
  return yomi.split(/E/).map {|y|
    "<yomi>"+y.sub(/i|\z/,'</yomi>\&')
  }.join("E")
end

# simplify_font: \L̃tHgwȗ
def simplify_font(kanji)
    kanji.gsub!(/<\/ST>/, "")
    kanji.gsub!(/<ST,(12|13|14|51)>/, "<\\1>")
    kanji.gsub!(/<ST,2([45])>/, "<2\\1>")
    kanji.gsub!(/<ST,(11|3[345]|41)>/, "<11>")
    kanji.gsub!(/<ST,[0-9]+>/, "")
end

# decrypt: lstGg̓e𕽕ɂ
def decrypt(ent, len)
    # 擪4̔{oCĝ
    # 4oCgBEƂ0xffffffffXORA
    # 0x8831b311𕄍ȂZ
    len4 = len & ~3
    arr = ent[0, len4].unpack("N*")
    arr.length.times { |i|
	arr[i] = ((arr[i] ^ 0xffffffff) + 0x8831b311) & 0xffffffff
    }
    ent[0, len4] = arr.pack("N*")

    # ]̃oCg0xffXOR
    i = len4
    while i < len
	ent[i] ^= 0xff
	i += 1
    end

    # 0x000xff̒O܂łʂƂĕԂ
    i = 0
    while i < len && ent[i] != 0x00 && ent[i] != 0xff
	i += 1
    end
    return ent[0, i]
end

# C

# lst/tblt@C
JUKUGO_LST = "lst/jukugo.lst"
JUKU2OYA_TBL = "tbl/juku2oya.tbl"

# lst/tblt@CI[v
if ARGV[0] == nil then
    STDERR.print "Usage: make_jukugo_list.rb data_directory\n"
    exit(1)
end
lst_path = ARGV[0] + "/" + JUKUGO_LST
tbl_path = ARGV[0] + "/" + JUKU2OYA_TBL
begin
    lstf = File.open(lst_path, "rb")
    tblf = File.open(tbl_path, "rb")
rescue
    STDERR.print "Can't open lst/tbl files\n"
    exit 1
end

# lstGg/GgTCY/TCY擾
lstf.seek(12, IO::SEEK_SET)
ent_num = lstf.read(4).unpack("V1")[0]
lst_ent_len = lstf.read(4).unpack("V1")[0]
lst_str_len = lstf.read(4).unpack("V1")[0]
lstf.seek(32, IO::SEEK_SET)

# lst/tblGgSǂ݁A
# Ggԍ/GgQƔԍƂƂɐ`ďo͂
kanji = nil
yomi = nil
ent_num.times { |i|
    # lstGgǂŕɂ
    list = lstf.read(lst_str_len)
    list = decrypt(list, lst_str_len)
    dummy = lstf.read(lst_ent_len - lst_str_len)

    # lstGg\LƓǂ݂ɕ
    list.gsub(/^(.*)<ST,80>(.*)<\/ST>$/) {
	kanji = $1
	yomi = $2
    }

    # \L̃tHgwȗ
    simplify_font(kanji)

    # ǂ݂Ɋʂ܂܂ĂҏW
    if yomi.index("i") != nil then
	edit_yomi(yomi)
    end

    # tblGgǂ
    tbl_ent_no = tblf.read(4).unpack("V1")[0]
    tbl_ref_no = tblf.read(4).unpack("V1")[0]
    
    # QƔԍtlstGgo͂
    yomi=add_yomi_tag(yomi)
    printf "%04d:%04d %s%s\n", tbl_ent_no, tbl_ref_no, kanji, yomi
}

# n
lstf.close
tblf.close
