diff --git a/orangecontrib/text/tests/data/old-bow-pickle.pkl b/orangecontrib/text/tests/data/old-bow-pickle.pkl new file mode 100644 index 000000000..8304a19fe Binary files /dev/null and b/orangecontrib/text/tests/data/old-bow-pickle.pkl differ diff --git a/orangecontrib/text/tests/test_vectorization_base.py b/orangecontrib/text/tests/test_vectorization_base.py index 60edbe10e..f289fc66e 100644 --- a/orangecontrib/text/tests/test_vectorization_base.py +++ b/orangecontrib/text/tests/test_vectorization_base.py @@ -1,10 +1,14 @@ +import os import unittest import numpy as np import scipy.sparse as sp from orangecontrib.text.corpus import Corpus -from orangecontrib.text.vectorization.base import BaseVectorizer +from orangecontrib.text.vectorization.base import ( + BaseVectorizer, + VectorizationComputeValue, +) class BaseVectorizationTest(unittest.TestCase): @@ -32,3 +36,33 @@ def test_variable_attributes(self): for a in c2.domain.attributes: self.assertIn('foo', a.attributes) + + +class TestVectorizationComputeValue(unittest.TestCase): + def test_unpickling_old_pickle(self): + """ + Before orange3-text version 1.12.0 variable was wrongly set to current + variable (variable that has this compute value attached) instead of + original variable which caused fails after latest changes in core + Orange. Since variable from VectorizationComputeValue is never used in + practice we do not set it anymore (it is always None for + VectorizationComputeValue). + Anyway it is still set in pickles create before 1.12.0. With this test + we test that old pickle with variables that have VectorizationComputeValue + are un-pickled correctly. + """ + path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "data", "old-bow-pickle.pkl" + ) + data = Corpus.from_file(path) + self.assertEqual(len(data), 3) + self.assertIsInstance(data.domain["!"].compute_value, VectorizationComputeValue) + self.assertIsInstance( + data.domain["aboard"].compute_value, VectorizationComputeValue + ) + self.assertIsNone(data.domain["!"].compute_value.variable) + self.assertIsNone(data.domain["aboard"].compute_value.variable) + + +if __name__ == "__main__": + unittest.main() diff --git a/orangecontrib/text/vectorization/base.py b/orangecontrib/text/vectorization/base.py index 76ba44037..d98c06671 100644 --- a/orangecontrib/text/vectorization/base.py +++ b/orangecontrib/text/vectorization/base.py @@ -76,3 +76,17 @@ def __init__(self, compute_shared, name): def compute(self, _, shared_data): ind = shared_data.feature_name_to_index[self.name] return shared_data.X[:, ind] + + def __setstate__(self, state): + """ + Before orange3-text version 1.12.0 variable was wrongly set to current + variable (variable that has this compute value attached) instead of + original variable which caused fails after latest changes in core + Orange. Since variable from VectorizationComputeValue is never used in + practice we do not set it anymore (it is always None for + VectorizationComputeValue). + Anyway it is still set in pickles create before 1.12.0 and this line + removes it when unpickling old pickles. + """ + state["variable"] = None + self.__dict__.update(state)