From ade4e7cf65e8978161de1a8646cbee5d060b6ae1 Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Tue, 27 Aug 2024 14:23:22 +0200 Subject: [PATCH 1/5] Revert d2059a5 --- orangecontrib/text/corpus.py | 11 +---------- orangecontrib/text/tests/test_corpus.py | 14 -------------- requirements.txt | 2 +- 3 files changed, 2 insertions(+), 25 deletions(-) diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py index b0660a830..8de6b753e 100644 --- a/orangecontrib/text/corpus.py +++ b/orangecontrib/text/corpus.py @@ -1,6 +1,6 @@ import os from collections import Counter, defaultdict -from copy import copy, deepcopy +from copy import copy from numbers import Integral from itertools import chain from typing import Union, Optional, List, Tuple, Dict @@ -19,15 +19,6 @@ ) from Orange.preprocess.transformation import Identity from Orange.data.util import get_unique_names - -# Gensim is 4.3.2 is incompatible with scipy 1.3, where they removed triu/ -# thus hack what it is missing here it. -# Remove this section after we depend on newer gensim -import scipy.linalg -if "triu" not in scipy.linalg.__dict__: - scipy.linalg.triu = np.triu - -from gensim import corpora from orangewidget.utils.signals import summarize, PartialSummary import scipy.sparse as sp diff --git a/orangecontrib/text/tests/test_corpus.py b/orangecontrib/text/tests/test_corpus.py index aa20b5f4f..02c56f2ec 100644 --- a/orangecontrib/text/tests/test_corpus.py +++ b/orangecontrib/text/tests/test_corpus.py @@ -1,7 +1,6 @@ import os import pickle import unittest -from datetime import datetime import numpy as np from numpy.testing import assert_array_equal @@ -16,7 +15,6 @@ from orangewidget.utils.signals import summarize from scipy.sparse import csr_matrix, issparse -import orangecontrib from orangecontrib.text import preprocess from orangecontrib.text.corpus import Corpus from orangecontrib.text.preprocess import ( @@ -27,18 +25,6 @@ from orangecontrib.text.tag import AveragedPerceptronTagger -class ImportHack(unittest.TestCase): - - def test_perhaps_remove_gensim_hack(self): - now = datetime.now() - if (now.year, now.month) >= (2024, 7): - self.fail( - "Check if gensim newer than 4.3.2 is available; if so, add it " - "to requirements, remove the scipy monkey-patch in corpus.py " - "and this test." - ) - - class CorpusTests(unittest.TestCase): def setUp(self): self.pos_tagger = AveragedPerceptronTagger() diff --git a/requirements.txt b/requirements.txt index 36c489f1d..538b9667a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ beautifulsoup4 biopython # Enables Pubmed widget. conllu docx2txt>=0.6 -gensim>=4.3.0,!=4.3.1 # gensim 4.3.1 is build on numpy 1.24, causing error on older numpys +gensim>=4.3.3 httpx!=0.23.1 # temporary fix - semantic search fail (but only in tests) langdetect lemmagen3 From f28ee34e230d8bb03c0311be8971c3888c15e69c Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Tue, 27 Aug 2024 14:34:17 +0200 Subject: [PATCH 2/5] Load averaged_perceptron_tagger_eng instead of averaged_perceptron_tagger --- orangecontrib/text/misc/nltk_data_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orangecontrib/text/misc/nltk_data_download.py b/orangecontrib/text/misc/nltk_data_download.py index 781c5b8e9..78383402d 100644 --- a/orangecontrib/text/misc/nltk_data_download.py +++ b/orangecontrib/text/misc/nltk_data_download.py @@ -22,7 +22,7 @@ 'punkt', 'opinion_lexicon', 'vader_lexicon', - 'averaged_perceptron_tagger', + 'averaged_perceptron_tagger_eng', 'maxent_treebank_pos_tagger', 'omw-1.4', ] From e4e5e4676101b4a2243ce19e695d5b04e6be6e35 Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Tue, 27 Aug 2024 14:48:57 +0200 Subject: [PATCH 3/5] Load maxent_treebank_pos_tagger_tab instead of maxent_treebank_pos_tagger --- orangecontrib/text/misc/nltk_data_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orangecontrib/text/misc/nltk_data_download.py b/orangecontrib/text/misc/nltk_data_download.py index 78383402d..6cf49ec24 100644 --- a/orangecontrib/text/misc/nltk_data_download.py +++ b/orangecontrib/text/misc/nltk_data_download.py @@ -23,7 +23,7 @@ 'opinion_lexicon', 'vader_lexicon', 'averaged_perceptron_tagger_eng', - 'maxent_treebank_pos_tagger', + 'maxent_treebank_pos_tagger_tab', 'omw-1.4', ] From b4286b48d9f860b96ed04e6c2d1eb47e60da7179 Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Thu, 29 Aug 2024 14:12:39 +0200 Subject: [PATCH 4/5] add nltk to the oldests tests --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index 1dc69c880..3402f9580 100644 --- a/tox.ini +++ b/tox.ini @@ -29,6 +29,7 @@ deps = oldest: orange-canvas-core==0.1.30 oldest: orange-widget-base==4.20.0 oldest: pandas==1.4.0 + oldest: nltk==3.8.1 latest: https://github.com/biolab/orange3/archive/refs/heads/master.zip#egg=orange3 latest: https://github.com/biolab/orange3-network/archive/refs/heads/master.zip#egg=orange3-network latest: https://github.com/biolab/orange-canvas-core/archive/refs/heads/master.zip#egg=orange-canvas-core From 3d773ae28609ac7f02c2601ad8e9dd450313068a Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Thu, 29 Aug 2024 14:19:32 +0200 Subject: [PATCH 5/5] raise nltk requirement --- requirements.txt | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 538b9667a..e6fadc0b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ gensim>=4.3.3 httpx!=0.23.1 # temporary fix - semantic search fail (but only in tests) langdetect lemmagen3 -nltk>=3.0.5 # TweetTokenizer introduced in 3.0.5 +nltk>=3.9.1 numpy odfpy>=1.3.5 Orange3 >=3.35.0 diff --git a/tox.ini b/tox.ini index 3402f9580..7758b2842 100644 --- a/tox.ini +++ b/tox.ini @@ -29,7 +29,7 @@ deps = oldest: orange-canvas-core==0.1.30 oldest: orange-widget-base==4.20.0 oldest: pandas==1.4.0 - oldest: nltk==3.8.1 + oldest: nltk==3.9.1 latest: https://github.com/biolab/orange3/archive/refs/heads/master.zip#egg=orange3 latest: https://github.com/biolab/orange3-network/archive/refs/heads/master.zip#egg=orange3-network latest: https://github.com/biolab/orange-canvas-core/archive/refs/heads/master.zip#egg=orange-canvas-core