Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] VectorizationComputeValue - fix unpickling old pickles #904

Merged
merged 1 commit into from
Sep 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added orangecontrib/text/tests/data/old-bow-pickle.pkl
Binary file not shown.
36 changes: 35 additions & 1 deletion orangecontrib/text/tests/test_vectorization_base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import os
import unittest

import numpy as np
import scipy.sparse as sp

from orangecontrib.text.corpus import Corpus
from orangecontrib.text.vectorization.base import BaseVectorizer
from orangecontrib.text.vectorization.base import (
BaseVectorizer,
VectorizationComputeValue,
)


class BaseVectorizationTest(unittest.TestCase):
Expand Down Expand Up @@ -32,3 +36,33 @@ def test_variable_attributes(self):

for a in c2.domain.attributes:
self.assertIn('foo', a.attributes)


class TestVectorizationComputeValue(unittest.TestCase):
def test_unpickling_old_pickle(self):
"""
Before orange3-text version 1.12.0 variable was wrongly set to current
variable (variable that has this compute value attached) instead of
original variable which caused fails after latest changes in core
Orange. Since variable from VectorizationComputeValue is never used in
practice we do not set it anymore (it is always None for
VectorizationComputeValue).
Anyway it is still set in pickles create before 1.12.0. With this test
we test that old pickle with variables that have VectorizationComputeValue
are un-pickled correctly.
"""
path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "data", "old-bow-pickle.pkl"
)
data = Corpus.from_file(path)
self.assertEqual(len(data), 3)
self.assertIsInstance(data.domain["!"].compute_value, VectorizationComputeValue)
self.assertIsInstance(
data.domain["aboard"].compute_value, VectorizationComputeValue
)
self.assertIsNone(data.domain["!"].compute_value.variable)
self.assertIsNone(data.domain["aboard"].compute_value.variable)


if __name__ == "__main__":
unittest.main()
14 changes: 14 additions & 0 deletions orangecontrib/text/vectorization/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,17 @@ def __init__(self, compute_shared, name):
def compute(self, _, shared_data):
ind = shared_data.feature_name_to_index[self.name]
return shared_data.X[:, ind]

def __setstate__(self, state):
"""
Before orange3-text version 1.12.0 variable was wrongly set to current
variable (variable that has this compute value attached) instead of
original variable which caused fails after latest changes in core
Orange. Since variable from VectorizationComputeValue is never used in
practice we do not set it anymore (it is always None for
VectorizationComputeValue).
Anyway it is still set in pickles create before 1.12.0 and this line
removes it when unpickling old pickles.
"""
state["variable"] = None
self.__dict__.update(state)