Skip to content

Commit

Permalink
skip parser build (Attempt #1)
Browse files Browse the repository at this point in the history
  • Loading branch information
minhna1112 committed May 20, 2024
1 parent 31b5863 commit 3876ce2
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 14 deletions.
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "codetext"
version = "0.0.8"
version = "0.0.9"
authors = [
{ name="Dung Manh Nguyen", email="[email protected]" },
]
Expand All @@ -21,7 +21,8 @@ dependencies = [
"Levenshtein>=0.20",
"langdetect>=1.0.0",
"bs4>=0.0.1",
"tabulate>=0.9.0"
"tabulate>=0.9.0",
"tree_sitter_languages>=1.10.0"
]

[project.urls]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ tabulate
Levenshtein
langdetect
bs4
tree-sitter-languages
22 changes: 14 additions & 8 deletions src/codetext/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,21 @@ def parse_code(raw_code: str, language: str='Auto', tree_sitter_path: str=None)
calling_script_path = Path(inspect.getframeinfo(sys._getframe(1)).filename)
load_path = str(calling_script_path.parent)

ts_lang_path = os.path.join(load_path, 'tree-sitter', f'{language}.so')
if not os.path.exists(ts_lang_path):
logger.warning(f"Not found `{language}.so` in `{load_path}/tree-sitter/`, attemp to build language")
build_language(language, load_path)

# Get parser from languages
parser = Parser()
language = Language(load_path + f"/tree-sitter/{language}.so", language)
parser.set_language(language)

try:
from tree_sitter_languages import get_language, get_parser
parser = get_parser(get_language(language))
except ImportError:
# Work-around when pre-built binaries wheels for tree-sitter-languages are not available
logger.warning(f"Troubled importing 'tree-sitter-languages', attemp to look for pre-built binaries in the workspace")
ts_lang_path = os.path.join(load_path, 'tree-sitter', f'{language}.so')
if not os.path.exists(ts_lang_path):
logger.warning(f"Not found `{language}.so` in `{load_path}/tree-sitter/`, attemp to build language")
build_language(language, load_path)
language = Language(load_path + f"/tree-sitter/{language}.so", language)
parser.set_language(language)

if isinstance(raw_code, str):
raw_code = bytes(raw_code, 'utf8')
elif isinstance(raw_code, bytes):
Expand Down
8 changes: 6 additions & 2 deletions tests/setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from ..src.codetext.utils import build_language

from tree_sitter_languages import get_language, get_parser

if __name__ == '__main__':
lang_list = ['python', 'cpp', 'java', 'c-sharp', 'ruby', 'rust', 'javascript', 'php', 'go']

for lang in lang_list:
build_language(lang)
# build_language(lang)
try:
get_parser(get_language(lang))
except:
build_language(lang)
2 changes: 0 additions & 2 deletions tests/test_utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ def test_parse_code(self):
def sum_2_num(a, b):
return a + b
"""

build_language(language='python')
parse_code(sample, 'python')


Expand Down

0 comments on commit 3876ce2

Please sign in to comment.