Skip to content

Commit

Permalink
Update dict to 5b6d334 and bump version to 0.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
ayaka14732 committed Dec 12, 2021
1 parent 662a8c1 commit 74f8e9c
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 11 deletions.
24 changes: 14 additions & 10 deletions preprocess.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from opencc import OpenCC
import os

os.system('wget -nc https://raw.githubusercontent.com/rime/rime-cantonese/bc5aa59/jyut6ping3.dict.yaml')
t2s = OpenCC('t2s').convert

os.system('wget -nc https://raw.githubusercontent.com/rime/rime-cantonese/5b6d334/jyut6ping3.dict.yaml')

def freq_str_to_float(s):
'''Convert frequency data in the dictionary file to float.
Expand All @@ -17,12 +19,11 @@ def freq_str_to_float(s):

DEFAULT_FREQ = 0.07

def build_dict():
def build_dict(d, filepath):
'''Create a dictionary of all the words with jyutping data.
If there are multiple possibilities, the one with higher frequency is used.
'''
d = {}
with open('jyut6ping3.dict.yaml') as f:
with open(filepath) as f:
for line in f:
if line == '...\n':
break
Expand Down Expand Up @@ -53,18 +54,21 @@ def build_dict():
((詞頻 == 元詞頻) and (元粵拼[-1] != '2' and 粵拼[-1] == '2')) # 變2調優先
if should_change:
d[] = (粵拼, 詞頻)
return {k: v[0] for k, v in d.items()}

def write_dict(d):
with open('src/ToJyutping/jyut6ping3.simple.dict.yaml', 'w') as f:
for k, v in d.items():
print(k + '\t' + v, file=f)

converter = OpenCC('t2s')
def main():
d = {}
build_dict(d, 'jyut6ping3.dict.yaml')

d_t = {k: v[0] for k, v in d.items()}
d_cn = {t2s(k): v for k, v in d_t.items()}

d_t = build_dict()
d_cn = {converter.convert(k): v for k, v in d_t.items()}
d = {**d_cn, **d_t}

d = {**d_cn, **d_t}
write_dict(d)

write_dict(d)
main()
2 changes: 1 addition & 1 deletion src/ToJyutping/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.2.0'
__version__ = '0.2.1'

0 comments on commit 74f8e9c

Please sign in to comment.