Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop abydos #90

Merged
merged 3 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,26 +45,26 @@ jobs:
repo-token: ${{ github.token }}
- run: task pytest-pure

# pytest-external:
# runs-on: ubuntu-latest
# strategy:
# fail-fast: false
# matrix:
# python-version:
# - "3.8"
# - "3.9"
# - "3.10"
# - "3.11"
# # - "3.12.0-rc.1"
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: ${{ matrix.python-version }}
# - uses: arduino/setup-task@v1
# with:
# repo-token: ${{ github.token }}
# - run: task pytest-external
pytest-external:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
# - "3.12.0-rc.1"
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- uses: arduino/setup-task@v1
with:
repo-token: ${{ github.token }}
- run: task pytest-external

markdownlint-cli:
runs-on: ubuntu-latest
Expand Down
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ hamming('text', 'testit')

Supported libraries:

1. [abydos](https://github.com/chrislit/abydos)
1. [Distance](https://github.com/doukremt/distance)
1. [jellyfish](https://github.com/jamesturk/jellyfish)
1. [py_stringmatching](https://github.com/anhaidgroup/py_stringmatching)
Expand All @@ -245,13 +244,11 @@ Without extras installation:
| DamerauLevenshtein | rapidfuzz | 0.00312 |
| DamerauLevenshtein | jellyfish | 0.00591 |
| DamerauLevenshtein | pyxdameraulevenshtein | 0.03335 |
| DamerauLevenshtein | abydos | 0.63278 |
| DamerauLevenshtein | **textdistance** | 0.83524 |
| Hamming | Levenshtein | 0.00038 |
| Hamming | rapidfuzz | 0.00044 |
| Hamming | jellyfish | 0.00091 |
| Hamming | distance | 0.00812 |
| Hamming | abydos | 0.00902 |
| Hamming | **textdistance** | 0.03531 |
| Jaro | rapidfuzz | 0.00092 |
| Jaro | jellyfish | 0.00191 |
Expand All @@ -265,7 +262,6 @@ Without extras installation:
| Levenshtein | pylev | 0.15688 |
| Levenshtein | distance | 0.28669 |
| Levenshtein | **textdistance** | 0.53902 |
| Levenshtein | abydos | 1.25783 |

Total: 24 libs.

Expand Down
10 changes: 0 additions & 10 deletions constraints.txt

This file was deleted.

3 changes: 0 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
extras = {
# enough for simple usage
'extras': [
'abydos',
'jellyfish', # for DamerauLevenshtein
'numpy', # for SmithWaterman and other
'python-Levenshtein', # for Jaro and Levenshtein
Expand All @@ -18,7 +17,6 @@
# needed for benchmarking, optimization and testing
'benchmark': [
# common
'abydos',
'jellyfish',
'numpy',
'python-Levenshtein',
Expand Down Expand Up @@ -68,7 +66,6 @@
'rapidfuzz>=2.6.0', # only same length, any iterators of hashable elements
'jellyfish', # only strings, any length
'distance', # only same length, any iterators
'abydos', # any iterators
],
'Jaro': [
'rapidfuzz>=2.6.0', # any iterators of hashable elements
Expand Down
28 changes: 17 additions & 11 deletions tests/test_external.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from __future__ import annotations

# built-in
import string
from math import isclose

# external
import hypothesis
import hypothesis.strategies
import pytest

# project
Expand All @@ -12,15 +16,7 @@

libraries = prototype.clone()

# numpy throws a bunch of warning about abydos using `np.int` isntead of `int`.
ABYDOS_WARNINGS = (
'ignore:`np.int` is a deprecated alias',
'ignore:`np.float` is a deprecated alias',
'ignore:Using or importing the ABCs',
)


@pytest.mark.filterwarnings(*ABYDOS_WARNINGS)
@pytest.mark.external
@pytest.mark.parametrize('alg', libraries.get_algorithms())
@hypothesis.settings(deadline=None)
Expand All @@ -30,6 +26,12 @@
)
def test_compare(left, right, alg):
for lib in libraries.get_libs(alg):

if lib.module_name == 'jellyfish':
ascii = set(string.printable)
if (set(left) | set(right)) - ascii:
continue

conditions = lib.conditions or {}
internal_func = getattr(textdistance, alg)(external=False, **conditions)
external_func = lib.get_function()
Expand All @@ -45,16 +47,21 @@ def test_compare(left, right, alg):
assert isclose(int_result, ext_result), str(lib)


@pytest.mark.filterwarnings(*ABYDOS_WARNINGS)
@pytest.mark.external
@pytest.mark.parametrize('alg', libraries.get_algorithms())
@hypothesis.given(
left=hypothesis.strategies.text(min_size=1),
right=hypothesis.strategies.text(min_size=1),
)
@pytest.mark.parametrize('qval', (None, 1, 2, 3))
def test_qval(left, right, alg, qval):
def test_qval(left: str, right: str, alg: str, qval: int | None) -> None:
for lib in libraries.get_libs(alg):

if lib.module_name == 'jellyfish':
ascii = set(string.printable)
if (set(left) | set(right)) - ascii:
continue

conditions = lib.conditions or {}
internal_func = getattr(textdistance, alg)(external=False, **conditions)
external_func = lib.get_function()
Expand All @@ -80,7 +87,6 @@ def test_qval(left, right, alg, qval):
assert isclose(int_result, ext_result), f'{lib}({repr(s1)}, {repr(s2)})'


@pytest.mark.filterwarnings(*ABYDOS_WARNINGS)
@pytest.mark.external
@pytest.mark.parametrize('alg', libraries.get_algorithms())
@hypothesis.given(
Expand Down
20 changes: 7 additions & 13 deletions textdistance/libraries.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ def optimize(self) -> None:
# sort libs by speed
self.libs[alg].sort(key=lambda lib: libs_names.index([lib.module_name, lib.func_name]))

def get_algorithms(self) -> list:
def get_algorithms(self) -> list[str]:
"""Get list of available algorithms.
"""
return list(self.libs.keys())

def get_libs(self, alg) -> list[LibraryBase]:
def get_libs(self, alg: str) -> list[LibraryBase]:
"""Get libs list for algorithm
"""
if alg not in self.libs:
Expand All @@ -69,7 +69,7 @@ def __init__(
*,
presets: dict[str, Any] | None = None,
attr: str | None = None,
conditions: dict[str, Any] | None = None,
conditions: dict[str, bool] | None = None,
) -> None:
self.module_name = module_name
self.func_name = func_name
Expand All @@ -89,7 +89,7 @@ def check_conditions(self, obj: object, *sequences: Sequence) -> bool:

return True

def prepare(self, *sequences) -> tuple:
def prepare(self, *sequences: Sequence) -> tuple:
return sequences

@property
Expand Down Expand Up @@ -128,7 +128,7 @@ def __str__(self) -> str:


class TextLibrary(LibraryBase):
def check_conditions(self, obj, *sequences: Sequence) -> bool:
def check_conditions(self, obj: object, *sequences: Sequence) -> bool:
if not super().check_conditions(obj, *sequences):
return False

Expand All @@ -142,15 +142,15 @@ def check_conditions(self, obj, *sequences: Sequence) -> bool:
return False
return True

def prepare(self, *sequences) -> tuple:
def prepare(self, *sequences: Sequence) -> tuple:
# convert list of letters to string
if isinstance(sequences[0], (tuple, list)):
sequences = tuple(map(lambda x: ''.join(x), sequences))
return sequences


class SameLengthLibrary(LibraryBase):
def check_conditions(self, obj, *sequences: Sequence) -> bool:
def check_conditions(self, obj: object, *sequences: Sequence) -> bool:
if not super().check_conditions(obj, *sequences):
return False
# compare only same length iterators
Expand All @@ -167,17 +167,12 @@ class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
reg = prototype.register

alg = 'DamerauLevenshtein'
reg(alg, LibraryBase(
'abydos.distance', 'DamerauLevenshtein', presets={}, attr='dist_abs',
conditions=dict(restricted=False),
))
reg(alg, LibraryBase('pyxdameraulevenshtein', 'damerau_levenshtein_distance', conditions=dict(restricted=True)))
reg(alg, TextLibrary('jellyfish', 'damerau_levenshtein_distance', conditions=dict(restricted=False)))
reg(alg, LibraryBase('rapidfuzz.distance.DamerauLevenshtein', 'distance', conditions=dict(restricted=False)))
reg(alg, LibraryBase('rapidfuzz.distance.OSA', 'distance', conditions=dict(restricted=True)))

alg = 'Hamming'
reg(alg, LibraryBase('abydos.distance', 'Hamming', presets={}, attr='dist_abs'))
reg(alg, SameLengthLibrary('distance', 'hamming'))
reg(alg, SameLengthTextLibrary('Levenshtein', 'hamming'))
reg(alg, TextLibrary('jellyfish', 'hamming_distance'))
Expand All @@ -197,7 +192,6 @@ class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
# reg(alg, TextLibrary('Levenshtein', 'jaro_winkler', conditions=dict(winklerize=True)))

alg = 'Levenshtein'
reg(alg, LibraryBase('abydos.distance', 'Levenshtein', presets={}, attr='dist_abs'))
reg(alg, LibraryBase('distance', 'levenshtein'))
reg(alg, LibraryBase('pylev', 'levenshtein'))
reg(alg, TextLibrary('jellyfish', 'levenshtein_distance'))
Expand Down