From ce91e568fe0926593de4b43fde8108f8b7b1da15 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 6 Feb 2024 18:18:21 +0100 Subject: [PATCH 01/10] feat: bump pydantic version to 2.0 --- pyproject.toml | 1 + .../components/quality_data_classes.py | 14 ++++---------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b43a84b3..26fb6774 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "pandas>=1.0.0", "pyphen>=0.11.0,<0.15.0", "ftfy>=6.0.3,<6.1.0", + "pydantic>=2.0", ] requires-python = ">=3.8" diff --git a/src/textdescriptives/components/quality_data_classes.py b/src/textdescriptives/components/quality_data_classes.py index b8251d70..4a5bac79 100644 --- a/src/textdescriptives/components/quality_data_classes.py +++ b/src/textdescriptives/components/quality_data_classes.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Optional, Tuple, Union -from pydantic import BaseModel, Extra, Field +from pydantic import ConfigDict, BaseModel, Field Interval = Tuple[Optional[float], Optional[float]] @@ -20,9 +20,7 @@ class ThresholdsOutput(BaseModel): >>> t_out.passed True """ - - class Config: - extra = Extra.forbid + model_config = ConfigDict(extra="forbid") threshold: Union[Interval, bool, None] value: Union[float, None] @@ -60,9 +58,7 @@ def __eq__(self, other: Any) -> bool: class QualityThresholds(BaseModel): """Thresholds for quality metrics.""" - - class Config: - extra = Extra.forbid + model_config = ConfigDict(extra="forbid") n_stop_words: Interval = Field( (2, None), @@ -163,9 +159,7 @@ class Config: class QualityOutput(BaseModel): """The output of the quality function.""" - - class Config: - extra = Extra.forbid + model_config = ConfigDict(extra="forbid") n_stop_words: ThresholdsOutput = Field( ..., From e236d0da869a71d331ac3ce19bf5c4e0045520a5 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 6 Feb 2024 18:19:21 +0100 Subject: [PATCH 02/10] build: update autodoc_pydantic to support pydantic 2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 26fb6774..1955a73e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ docs = [ "sphinxext-opengraph>=0.7.3", "myst-nb>=0.6.0", "sphinx_design>=0.3.0", - "autodoc_pydantic==1.9.0", + "autodoc_pydantic==2.0.1", ] tutorials = [ "jupyter", From 7854c8da6a6f02db4156e61ed3f17977f20dfd20 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 6 Feb 2024 18:25:34 +0100 Subject: [PATCH 03/10] tests: fix type --- tests/test_quality.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_quality.py b/tests/test_quality.py index 6b631822..53d7854a 100644 --- a/tests/test_quality.py +++ b/tests/test_quality.py @@ -4,7 +4,6 @@ import pytest import spacy - import textdescriptives as td from textdescriptives.components.quality import ( alpha_ratio, @@ -208,7 +207,7 @@ def test_quality_component_with_config(nlp: spacy.Language): proportion_bullet_points=(None, 0.8), duplicate_line_chr_fraction=(None, 0.2), duplicate_paragraph_chr_fraction=(None, 0.2), - top_ngram_chr_fraction={2: (None, 0.6), 3: (None, 0.6)}, + top_ngram_chr_fraction={"2": (None, 0.6), "3": (None, 0.6)}, duplicate_ngram_chr_fraction={}, contains={"lorem ipsum": False}, oov_ratio=(None, 0.3), From 3baa06d183733caeb4185f36b80f4438516adb70 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 6 Feb 2024 18:25:49 +0100 Subject: [PATCH 04/10] ci: update semrelease version for pydantic support --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1871aa71..b5d7d1fa 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,7 +34,7 @@ jobs: - name: Python Semantic Release id: release - uses: python-semantic-release/python-semantic-release@v8.0.4 + uses: python-semantic-release/python-semantic-release@v8.7.2 with: github_token: ${{ secrets.RELEASE }} root_options: "-vv" From 2ec4aa90e122a65ddc23712afa6cfccd160e4d41 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 6 Feb 2024 18:29:42 +0100 Subject: [PATCH 05/10] lint --- src/textdescriptives/components/quality_data_classes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/textdescriptives/components/quality_data_classes.py b/src/textdescriptives/components/quality_data_classes.py index 4a5bac79..e730b5be 100644 --- a/src/textdescriptives/components/quality_data_classes.py +++ b/src/textdescriptives/components/quality_data_classes.py @@ -20,6 +20,7 @@ class ThresholdsOutput(BaseModel): >>> t_out.passed True """ + model_config = ConfigDict(extra="forbid") threshold: Union[Interval, bool, None] @@ -58,6 +59,7 @@ def __eq__(self, other: Any) -> bool: class QualityThresholds(BaseModel): """Thresholds for quality metrics.""" + model_config = ConfigDict(extra="forbid") n_stop_words: Interval = Field( @@ -159,6 +161,7 @@ class QualityThresholds(BaseModel): class QualityOutput(BaseModel): """The output of the quality function.""" + model_config = ConfigDict(extra="forbid") n_stop_words: ThresholdsOutput = Field( From 8631fc8fe84b29962bc4ecd378c7c32f108b30c7 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 6 Feb 2024 18:35:52 +0100 Subject: [PATCH 06/10] docs: bin pydantic version in docs building --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1955a73e..5213bebd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ documentation = "https://hlasse.github.io/TextDescriptives/" style = ["black==24.1.1", "pre-commit==3.6.0", "ruff==0.1.15", "mypy==1.8.0"] tests = ["pytest>=7.1.3", "pytest-cov>=3.0.0"] docs = [ + "pydantic==2.1", "sphinx>=5.3.0", "furo==2022.12.7", "sphinx-copybutton>=0.5.1", From d7d5a30ea19a399e3800e94e220264bbd2bb45c8 Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 9 Apr 2024 14:54:15 +0200 Subject: [PATCH 07/10] deps: update min spacy version to 3.6 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5213bebd..d01194fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ keywords = [ ] dependencies = [ - "spacy[lookups]>=3.1.0", + "spacy[lookups]>=3.6.0", "numpy>=1.20.0", "pandas>=1.0.0", "pyphen>=0.11.0,<0.15.0", @@ -60,7 +60,7 @@ docs = [ "sphinxext-opengraph>=0.7.3", "myst-nb>=0.6.0", "sphinx_design>=0.3.0", - "autodoc_pydantic==2.0.1", + "autodoc_pydantic==2.1.0", ] tutorials = [ "jupyter", From 7b24af8fe2bbf549643bedd431f59e340a3e970c Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 9 Apr 2024 14:55:11 +0200 Subject: [PATCH 08/10] CI: update sem ver version --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b5d7d1fa..c87332bc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,7 +34,7 @@ jobs: - name: Python Semantic Release id: release - uses: python-semantic-release/python-semantic-release@v8.7.2 + uses: python-semantic-release/python-semantic-release@v9.4.1 with: github_token: ${{ secrets.RELEASE }} root_options: "-vv" From 4e2029f9d0e0cf12ec36d8c8f0ce67bd5f01f24b Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 9 Apr 2024 14:55:37 +0200 Subject: [PATCH 09/10] tests: update spacy model version for tests to 3.6 --- docs/tutorials/requirements.txt | 2 +- tests/requirements.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tutorials/requirements.txt b/docs/tutorials/requirements.txt index 8a232dc3..acebe206 100644 --- a/docs/tutorials/requirements.txt +++ b/docs/tutorials/requirements.txt @@ -2,4 +2,4 @@ # e.g. links to wheels which is not allowed in pyproject.toml on pypi # spacy pipeline -https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0.tar.gz +https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz diff --git a/tests/requirements.txt b/tests/requirements.txt index bd81a13a..cbce7ed7 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,5 +2,5 @@ # e.g. links to wheels which is not allowed in pyproject.toml on pypi # spacy pipelines -https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz -https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.5.0/en_core_web_md-3.5.0.tar.gz \ No newline at end of file +https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz +https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.6.0/en_core_web_md-3.6.0.tar.gz \ No newline at end of file From 844c2403f34c7dcea9f83cfaf764176aac7e674b Mon Sep 17 00:00:00 2001 From: Lasse Date: Tue, 9 Apr 2024 15:16:06 +0200 Subject: [PATCH 10/10] fix: misc --- src/textdescriptives/components/quality.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textdescriptives/components/quality.py b/src/textdescriptives/components/quality.py index 368f347d..adea7113 100644 --- a/src/textdescriptives/components/quality.py +++ b/src/textdescriptives/components/quality.py @@ -514,7 +514,7 @@ def set_quality(self, doc: Doc) -> None: # to allow the variable to json serializable we convert it to json # it is then converted back into a quality output object in the getter - doc._._quality = self.quality_setter(doc).dict() + doc._._quality = self.quality_setter(doc).model_dump() doc._.passed_quality_check = self.passed_quality_thresholds(doc) def passed_quality_thresholds(self, span: Union[Span, Doc]) -> bool: