diff --git a/nlp_link/__init__.py b/nlp_link/__init__.py index e69de29..6b6d488 100644 --- a/nlp_link/__init__.py +++ b/nlp_link/__init__.py @@ -0,0 +1,19 @@ +import yaml +from pathlib import Path +from typing import Optional +import logging + + +def get_yaml_config(file_path: Path) -> Optional[dict]: + """Fetch yaml config and return as dict if it exists.""" + if file_path.exists(): + with open(file_path, "rt") as f: + return yaml.load(f.read(), Loader=yaml.FullLoader) + + +# Define project base directory +PROJECT_DIR = Path(__file__).resolve().parents[1] + +logger = logging.getLogger(__name__) + +soc_mapper_config = get_yaml_config(PROJECT_DIR / "nlp_link/soc_mapper/config.yaml") diff --git a/nlp_link/soc_mapper/soc_map.py b/nlp_link/soc_mapper/soc_map.py index d1b78c1..fb912e0 100644 --- a/nlp_link/soc_mapper/soc_map.py +++ b/nlp_link/soc_mapper/soc_map.py @@ -21,6 +21,8 @@ from tqdm import tqdm import numpy as np +from nlp_link import soc_mapper_config + from nlp_link.soc_mapper.soc_map_utils import ( load_job_title_soc, process_job_title_soc, @@ -31,8 +33,7 @@ from nlp_link.linker_utils import load_bert -from utils.utils import ( - soc_mapper_config, +from nlp_link.utils.utils import ( load_s3_json, load_local_json, save_to_s3, diff --git a/nlp_link/soc_mapper/soc_map_utils.py b/nlp_link/soc_mapper/soc_map_utils.py index facf8d4..a88b7b5 100644 --- a/nlp_link/soc_mapper/soc_map_utils.py +++ b/nlp_link/soc_mapper/soc_map_utils.py @@ -2,7 +2,7 @@ import re -from utils.utils import soc_mapper_config +from nlp_link import soc_mapper_config def load_job_title_soc(soc_mapper_config: dict = soc_mapper_config) -> pd.DataFrame(): diff --git a/utils/utils.py b/nlp_link/utils/utils.py similarity index 83% rename from utils/utils.py rename to nlp_link/utils/utils.py index 3a19f15..5772f0c 100644 --- a/utils/utils.py +++ b/nlp_link/utils/utils.py @@ -1,27 +1,10 @@ -import yaml -from pathlib import Path -from typing import Optional -import logging import boto3 import json from fnmatch import fnmatch from decimal import Decimal import numpy - -def get_yaml_config(file_path: Path) -> Optional[dict]: - """Fetch yaml config and return as dict if it exists.""" - if file_path.exists(): - with open(file_path, "rt") as f: - return yaml.load(f.read(), Loader=yaml.FullLoader) - - -# Define project base directory -PROJECT_DIR = Path(__file__).resolve().parents[1] - -logger = logging.getLogger(__name__) - -soc_mapper_config = get_yaml_config(PROJECT_DIR / "nlp_link/soc_mapper/config.yaml") +from nlp_link import logger def get_s3_resource(): diff --git a/pyproject.toml b/pyproject.toml index b122274..0438fec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "nlp-link" -version = "0.1.2" +version = "0.1.3" description = "A python package to semantically link two lists of texts." authors = ["Nesta "] readme = "README.md" @@ -15,10 +15,11 @@ torch = "^1.13.1" pytest = "^8.3.2" tqdm = "^4.66.4" numpy = "^1.26.4" -s3fs = "^2022.5.0" openpyxl = "^3.1.3" -boto3 = "^1.21.21" wasabi = "^1.1.3" +s3fs = {extras = ["boto3"], version = ">=2023.12.0"} +boto3 = "*" +botocore = "*" [build-system] requires = ["poetry-core"]