From 3876ce2e42d34d2661261f219379da7562c4a52d Mon Sep 17 00:00:00 2001 From: minhna1112 Date: Mon, 20 May 2024 09:42:50 +0700 Subject: [PATCH] skip parser build (Attempt #1) --- pyproject.toml | 5 +++-- requirements.txt | 1 + src/codetext/utils/utils.py | 22 ++++++++++++++-------- tests/setup.py | 8 ++++++-- tests/test_utils/test_utils.py | 2 -- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d8bb24d..b6a68f6 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "codetext" -version = "0.0.8" +version = "0.0.9" authors = [ { name="Dung Manh Nguyen", email="dungnm.workspace@gmail.com" }, ] @@ -21,7 +21,8 @@ dependencies = [ "Levenshtein>=0.20", "langdetect>=1.0.0", "bs4>=0.0.1", - "tabulate>=0.9.0" + "tabulate>=0.9.0", + "tree_sitter_languages>=1.10.0" ] [project.urls] diff --git a/requirements.txt b/requirements.txt index d438040..9eb91b2 100755 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ tabulate Levenshtein langdetect bs4 +tree-sitter-languages \ No newline at end of file diff --git a/src/codetext/utils/utils.py b/src/codetext/utils/utils.py index d330ecb..16c94f8 100644 --- a/src/codetext/utils/utils.py +++ b/src/codetext/utils/utils.py @@ -92,15 +92,21 @@ def parse_code(raw_code: str, language: str='Auto', tree_sitter_path: str=None) calling_script_path = Path(inspect.getframeinfo(sys._getframe(1)).filename) load_path = str(calling_script_path.parent) - ts_lang_path = os.path.join(load_path, 'tree-sitter', f'{language}.so') - if not os.path.exists(ts_lang_path): - logger.warning(f"Not found `{language}.so` in `{load_path}/tree-sitter/`, attemp to build language") - build_language(language, load_path) - + # Get parser from languages parser = Parser() - language = Language(load_path + f"/tree-sitter/{language}.so", language) - parser.set_language(language) - + try: + from tree_sitter_languages import get_language, get_parser + parser = get_parser(get_language(language)) + except ImportError: + # Work-around when pre-built binaries wheels for tree-sitter-languages are not available + logger.warning(f"Troubled importing 'tree-sitter-languages', attemp to look for pre-built binaries in the workspace") + ts_lang_path = os.path.join(load_path, 'tree-sitter', f'{language}.so') + if not os.path.exists(ts_lang_path): + logger.warning(f"Not found `{language}.so` in `{load_path}/tree-sitter/`, attemp to build language") + build_language(language, load_path) + language = Language(load_path + f"/tree-sitter/{language}.so", language) + parser.set_language(language) + if isinstance(raw_code, str): raw_code = bytes(raw_code, 'utf8') elif isinstance(raw_code, bytes): diff --git a/tests/setup.py b/tests/setup.py index 4a7f6aa..d9516a6 100755 --- a/tests/setup.py +++ b/tests/setup.py @@ -1,8 +1,12 @@ from ..src.codetext.utils import build_language - +from tree_sitter_languages import get_language, get_parser if __name__ == '__main__': lang_list = ['python', 'cpp', 'java', 'c-sharp', 'ruby', 'rust', 'javascript', 'php', 'go'] for lang in lang_list: - build_language(lang) + # build_language(lang) + try: + get_parser(get_language(lang)) + except: + build_language(lang) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index d4a4ba4..af7288c 100755 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -14,8 +14,6 @@ def test_parse_code(self): def sum_2_num(a, b): return a + b """ - - build_language(language='python') parse_code(sample, 'python')