From ade4e7cf65e8978161de1a8646cbee5d060b6ae1 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 27 Aug 2024 14:23:22 +0200
Subject: [PATCH 1/5] Revert d2059a5
---
orangecontrib/text/corpus.py | 11 +----------
orangecontrib/text/tests/test_corpus.py | 14 --------------
requirements.txt | 2 +-
3 files changed, 2 insertions(+), 25 deletions(-)
diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py
index b0660a830..8de6b753e 100644
--- a/orangecontrib/text/corpus.py
+++ b/orangecontrib/text/corpus.py
@@ -1,6 +1,6 @@
import os
from collections import Counter, defaultdict
-from copy import copy, deepcopy
+from copy import copy
from numbers import Integral
from itertools import chain
from typing import Union, Optional, List, Tuple, Dict
@@ -19,15 +19,6 @@
)
from Orange.preprocess.transformation import Identity
from Orange.data.util import get_unique_names
-
-# Gensim is 4.3.2 is incompatible with scipy 1.3, where they removed triu/
-# thus hack what it is missing here it.
-# Remove this section after we depend on newer gensim
-import scipy.linalg
-if "triu" not in scipy.linalg.__dict__:
- scipy.linalg.triu = np.triu
-
-from gensim import corpora
from orangewidget.utils.signals import summarize, PartialSummary
import scipy.sparse as sp
diff --git a/orangecontrib/text/tests/test_corpus.py b/orangecontrib/text/tests/test_corpus.py
index aa20b5f4f..02c56f2ec 100644
--- a/orangecontrib/text/tests/test_corpus.py
+++ b/orangecontrib/text/tests/test_corpus.py
@@ -1,7 +1,6 @@
import os
import pickle
import unittest
-from datetime import datetime
import numpy as np
from numpy.testing import assert_array_equal
@@ -16,7 +15,6 @@
from orangewidget.utils.signals import summarize
from scipy.sparse import csr_matrix, issparse
-import orangecontrib
from orangecontrib.text import preprocess
from orangecontrib.text.corpus import Corpus
from orangecontrib.text.preprocess import (
@@ -27,18 +25,6 @@
from orangecontrib.text.tag import AveragedPerceptronTagger
-class ImportHack(unittest.TestCase):
-
- def test_perhaps_remove_gensim_hack(self):
- now = datetime.now()
- if (now.year, now.month) >= (2024, 7):
- self.fail(
- "Check if gensim newer than 4.3.2 is available; if so, add it "
- "to requirements, remove the scipy monkey-patch in corpus.py "
- "and this test."
- )
-
-
class CorpusTests(unittest.TestCase):
def setUp(self):
self.pos_tagger = AveragedPerceptronTagger()
diff --git a/requirements.txt b/requirements.txt
index 36c489f1d..538b9667a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ beautifulsoup4
biopython # Enables Pubmed widget.
conllu
docx2txt>=0.6
-gensim>=4.3.0,!=4.3.1 # gensim 4.3.1 is build on numpy 1.24, causing error on older numpys
+gensim>=4.3.3
httpx!=0.23.1 # temporary fix - semantic search fail (but only in tests)
langdetect
lemmagen3
From f28ee34e230d8bb03c0311be8971c3888c15e69c Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 27 Aug 2024 14:34:17 +0200
Subject: [PATCH 2/5] Load averaged_perceptron_tagger_eng instead of
averaged_perceptron_tagger
---
orangecontrib/text/misc/nltk_data_download.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/orangecontrib/text/misc/nltk_data_download.py b/orangecontrib/text/misc/nltk_data_download.py
index 781c5b8e9..78383402d 100644
--- a/orangecontrib/text/misc/nltk_data_download.py
+++ b/orangecontrib/text/misc/nltk_data_download.py
@@ -22,7 +22,7 @@
'punkt',
'opinion_lexicon',
'vader_lexicon',
- 'averaged_perceptron_tagger',
+ 'averaged_perceptron_tagger_eng',
'maxent_treebank_pos_tagger',
'omw-1.4',
]
From e4e5e4676101b4a2243ce19e695d5b04e6be6e35 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 27 Aug 2024 14:48:57 +0200
Subject: [PATCH 3/5] Load maxent_treebank_pos_tagger_tab instead of
maxent_treebank_pos_tagger
---
orangecontrib/text/misc/nltk_data_download.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/orangecontrib/text/misc/nltk_data_download.py b/orangecontrib/text/misc/nltk_data_download.py
index 78383402d..6cf49ec24 100644
--- a/orangecontrib/text/misc/nltk_data_download.py
+++ b/orangecontrib/text/misc/nltk_data_download.py
@@ -23,7 +23,7 @@
'opinion_lexicon',
'vader_lexicon',
'averaged_perceptron_tagger_eng',
- 'maxent_treebank_pos_tagger',
+ 'maxent_treebank_pos_tagger_tab',
'omw-1.4',
]
From b4286b48d9f860b96ed04e6c2d1eb47e60da7179 Mon Sep 17 00:00:00 2001
From: Marko Toplak
Date: Thu, 29 Aug 2024 14:12:39 +0200
Subject: [PATCH 4/5] add nltk to the oldests tests
---
tox.ini | 1 +
1 file changed, 1 insertion(+)
diff --git a/tox.ini b/tox.ini
index 1dc69c880..3402f9580 100644
--- a/tox.ini
+++ b/tox.ini
@@ -29,6 +29,7 @@ deps =
oldest: orange-canvas-core==0.1.30
oldest: orange-widget-base==4.20.0
oldest: pandas==1.4.0
+ oldest: nltk==3.8.1
latest: https://github.com/biolab/orange3/archive/refs/heads/master.zip#egg=orange3
latest: https://github.com/biolab/orange3-network/archive/refs/heads/master.zip#egg=orange3-network
latest: https://github.com/biolab/orange-canvas-core/archive/refs/heads/master.zip#egg=orange-canvas-core
From 3d773ae28609ac7f02c2601ad8e9dd450313068a Mon Sep 17 00:00:00 2001
From: Marko Toplak
Date: Thu, 29 Aug 2024 14:19:32 +0200
Subject: [PATCH 5/5] raise nltk requirement
---
requirements.txt | 2 +-
tox.ini | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 538b9667a..e6fadc0b6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,7 @@ gensim>=4.3.3
httpx!=0.23.1 # temporary fix - semantic search fail (but only in tests)
langdetect
lemmagen3
-nltk>=3.0.5 # TweetTokenizer introduced in 3.0.5
+nltk>=3.9.1
numpy
odfpy>=1.3.5
Orange3 >=3.35.0
diff --git a/tox.ini b/tox.ini
index 3402f9580..7758b2842 100644
--- a/tox.ini
+++ b/tox.ini
@@ -29,7 +29,7 @@ deps =
oldest: orange-canvas-core==0.1.30
oldest: orange-widget-base==4.20.0
oldest: pandas==1.4.0
- oldest: nltk==3.8.1
+ oldest: nltk==3.9.1
latest: https://github.com/biolab/orange3/archive/refs/heads/master.zip#egg=orange3
latest: https://github.com/biolab/orange3-network/archive/refs/heads/master.zip#egg=orange3-network
latest: https://github.com/biolab/orange-canvas-core/archive/refs/heads/master.zip#egg=orange-canvas-core