From a5794d08ccbe7fe5f06877e95afdfb770587fe34 Mon Sep 17 00:00:00 2001 From: lizgzil Date: Thu, 31 Oct 2024 18:37:06 +0000 Subject: [PATCH 1/4] Move utils to package folder --- nlp_link/__init__.py | 19 +++++++++++++++++++ nlp_link/soc_mapper/soc_map.py | 5 +++-- nlp_link/soc_mapper/soc_map_utils.py | 2 +- {utils => nlp_link/utils}/utils.py | 19 +------------------ 4 files changed, 24 insertions(+), 21 deletions(-) rename {utils => nlp_link/utils}/utils.py (83%) diff --git a/nlp_link/__init__.py b/nlp_link/__init__.py index e69de29..6b6d488 100644 --- a/nlp_link/__init__.py +++ b/nlp_link/__init__.py @@ -0,0 +1,19 @@ +import yaml +from pathlib import Path +from typing import Optional +import logging + + +def get_yaml_config(file_path: Path) -> Optional[dict]: + """Fetch yaml config and return as dict if it exists.""" + if file_path.exists(): + with open(file_path, "rt") as f: + return yaml.load(f.read(), Loader=yaml.FullLoader) + + +# Define project base directory +PROJECT_DIR = Path(__file__).resolve().parents[1] + +logger = logging.getLogger(__name__) + +soc_mapper_config = get_yaml_config(PROJECT_DIR / "nlp_link/soc_mapper/config.yaml") diff --git a/nlp_link/soc_mapper/soc_map.py b/nlp_link/soc_mapper/soc_map.py index d1b78c1..fb912e0 100644 --- a/nlp_link/soc_mapper/soc_map.py +++ b/nlp_link/soc_mapper/soc_map.py @@ -21,6 +21,8 @@ from tqdm import tqdm import numpy as np +from nlp_link import soc_mapper_config + from nlp_link.soc_mapper.soc_map_utils import ( load_job_title_soc, process_job_title_soc, @@ -31,8 +33,7 @@ from nlp_link.linker_utils import load_bert -from utils.utils import ( - soc_mapper_config, +from nlp_link.utils.utils import ( load_s3_json, load_local_json, save_to_s3, diff --git a/nlp_link/soc_mapper/soc_map_utils.py b/nlp_link/soc_mapper/soc_map_utils.py index facf8d4..a88b7b5 100644 --- a/nlp_link/soc_mapper/soc_map_utils.py +++ b/nlp_link/soc_mapper/soc_map_utils.py @@ -2,7 +2,7 @@ import re -from utils.utils import soc_mapper_config +from nlp_link import soc_mapper_config def load_job_title_soc(soc_mapper_config: dict = soc_mapper_config) -> pd.DataFrame(): diff --git a/utils/utils.py b/nlp_link/utils/utils.py similarity index 83% rename from utils/utils.py rename to nlp_link/utils/utils.py index 3a19f15..5772f0c 100644 --- a/utils/utils.py +++ b/nlp_link/utils/utils.py @@ -1,27 +1,10 @@ -import yaml -from pathlib import Path -from typing import Optional -import logging import boto3 import json from fnmatch import fnmatch from decimal import Decimal import numpy - -def get_yaml_config(file_path: Path) -> Optional[dict]: - """Fetch yaml config and return as dict if it exists.""" - if file_path.exists(): - with open(file_path, "rt") as f: - return yaml.load(f.read(), Loader=yaml.FullLoader) - - -# Define project base directory -PROJECT_DIR = Path(__file__).resolve().parents[1] - -logger = logging.getLogger(__name__) - -soc_mapper_config = get_yaml_config(PROJECT_DIR / "nlp_link/soc_mapper/config.yaml") +from nlp_link import logger def get_s3_resource(): From 692276ce8c4c91b1c237bdca00d544382bcf1836 Mon Sep 17 00:00:00 2001 From: lizgzil Date: Thu, 31 Oct 2024 18:37:29 +0000 Subject: [PATCH 2/4] update boto and package version --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b122274..489dcae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "nlp-link" -version = "0.1.2" +version = "0.1.3" description = "A python package to semantically link two lists of texts." authors = ["Nesta "] readme = "README.md" @@ -17,7 +17,8 @@ tqdm = "^4.66.4" numpy = "^1.26.4" s3fs = "^2022.5.0" openpyxl = "^3.1.3" -boto3 = "^1.21.21" +boto3 = "^1.34.99" +botocore = "^1.34.99" wasabi = "^1.1.3" [build-system] From 2c3ba02131304aad608b0c87feab764b969f3988 Mon Sep 17 00:00:00 2001 From: lizgzil Date: Thu, 31 Oct 2024 18:43:28 +0000 Subject: [PATCH 3/4] Try to solve boto and s3fs version issue --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 489dcae..520ae63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,10 +15,10 @@ torch = "^1.13.1" pytest = "^8.3.2" tqdm = "^4.66.4" numpy = "^1.26.4" -s3fs = "^2022.5.0" +s3fs = "^2022.11.0" openpyxl = "^3.1.3" -boto3 = "^1.34.99" -botocore = "^1.34.99" +boto3 = "^1.24.59" +botocore = "^1.27.59" wasabi = "^1.1.3" [build-system] From 25a56f5658db0ad8c61265a201189ecd8268f4d4 Mon Sep 17 00:00:00 2001 From: lizgzil Date: Thu, 31 Oct 2024 18:46:48 +0000 Subject: [PATCH 4/4] Potentially more efficient boto s3fs install --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 520ae63..0438fec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,11 +15,11 @@ torch = "^1.13.1" pytest = "^8.3.2" tqdm = "^4.66.4" numpy = "^1.26.4" -s3fs = "^2022.11.0" openpyxl = "^3.1.3" -boto3 = "^1.24.59" -botocore = "^1.27.59" wasabi = "^1.1.3" +s3fs = {extras = ["boto3"], version = ">=2023.12.0"} +boto3 = "*" +botocore = "*" [build-system] requires = ["poetry-core"]