Skip to content

Commit

Permalink
Simplify conv_cmap.py cli
Browse files Browse the repository at this point in the history
  • Loading branch information
pietermarsman committed Jan 27, 2024
1 parent 9cc4d1d commit 45e8443
Showing 1 changed file with 56 additions and 50 deletions.
106 changes: 56 additions & 50 deletions tools/conv_cmap.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#!/usr/bin/env python3

import argparse
import codecs
import gzip
import pickle as pickle
import sys
from pathlib import Path
from typing import List, Any


class CMapConverter:
Expand Down Expand Up @@ -149,56 +152,59 @@ def dump_unicodemap(self, fp):
return


def main(argv):
import getopt
import gzip
import os.path

def usage():
print(
"usage: %s [-c enc=codec] output_dir regname [cid2code.txt ...]" % argv[0]
)
return 100

try:
(opts, args) = getopt.getopt(argv[1:], "c:")
except getopt.GetoptError:
return usage()
enc2codec = {}
for (k, v) in opts:
if k == "-c":
(enc, _, codec) = v.partition("=")
enc2codec[enc] = codec
if not args:
return usage()
outdir = args.pop(0)
if not args:
return usage()
regname = args.pop(0)

converter = CMapConverter(enc2codec)
for path in args:
print("reading: %r..." % path)
fp = open(path)
converter.load(fp)
fp.close()

def create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
parser.add_argument(
"--encoding-codec",
"-c",
type=str,
action="append",
default=[],
help="Specify the codec of an encoding. Use `enc=codec` as a value.",
)
parser.add_argument(
"output_dir",
type=Path,
help="Directory where the compressed cmap's are stored.",
)
parser.add_argument(
"regname",
type=str,
)
parser.add_argument("cid2code", type=Path, nargs="*", help="Input cmaps.")
return parser


def main(argv: List[Any]):
parsed_args = create_parser().parse_args(argv[1:])

encoding_codec: List[str] = parsed_args.encoding_codec
outdir: Path = parsed_args.output_dir
regname: str = parsed_args.regname
cid2codes: List[Path] = parsed_args.cid2code

converter = CMapConverter(
dict([enc_codec.split("=") for enc_codec in encoding_codec])
)

for path in cid2codes:
print(f"reading: {path}...")
path.parent.mkdir(exist_ok=True)
with path.open() as fp:
converter.load(fp)

outdir.mkdir(exist_ok=True)
for enc in converter.get_encs():
fname = "%s.pickle.gz" % enc
path = os.path.join(outdir, fname)
print("writing: %r..." % path)
fp = gzip.open(path, "wb")
converter.dump_cmap(fp, enc)
fp.close()

fname = "to-unicode-%s.pickle.gz" % regname
path = os.path.join(outdir, fname)
print("writing: %r..." % path)
fp = gzip.open(path, "wb")
converter.dump_unicodemap(fp)
fp.close()
return
path = outdir / f"{enc}.pickle.gz"
print(f"writing: {path}...")
with gzip.open(path, "wb") as fp:
converter.dump_cmap(fp, enc)

path = outdir / f"to-unicode-{regname}.pickle.gz"
print(f"writing: {path}...")
with gzip.open(path, "wb") as fp:
converter.dump_unicodemap(fp)


if __name__ == "__main__":
sys.exit(main(sys.argv)) # type: ignore[no-untyped-call]
sys.exit(main(sys.argv))

0 comments on commit 45e8443

Please sign in to comment.