Skip to content

Commit

Permalink
Merge pull request #904 from PrimozGodec/fix-vectorization-cv
Browse files Browse the repository at this point in the history
[FIX] VectorizationComputeValue - fix unpickling old pickles
  • Loading branch information
ajdapretnar authored Sep 16, 2022
2 parents f9def97 + 8e2789d commit d450f48
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 1 deletion.
Binary file added orangecontrib/text/tests/data/old-bow-pickle.pkl
Binary file not shown.
36 changes: 35 additions & 1 deletion orangecontrib/text/tests/test_vectorization_base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import os
import unittest

import numpy as np
import scipy.sparse as sp

from orangecontrib.text.corpus import Corpus
from orangecontrib.text.vectorization.base import BaseVectorizer
from orangecontrib.text.vectorization.base import (
BaseVectorizer,
VectorizationComputeValue,
)


class BaseVectorizationTest(unittest.TestCase):
Expand Down Expand Up @@ -32,3 +36,33 @@ def test_variable_attributes(self):

for a in c2.domain.attributes:
self.assertIn('foo', a.attributes)


class TestVectorizationComputeValue(unittest.TestCase):
def test_unpickling_old_pickle(self):
"""
Before orange3-text version 1.12.0 variable was wrongly set to current
variable (variable that has this compute value attached) instead of
original variable which caused fails after latest changes in core
Orange. Since variable from VectorizationComputeValue is never used in
practice we do not set it anymore (it is always None for
VectorizationComputeValue).
Anyway it is still set in pickles create before 1.12.0. With this test
we test that old pickle with variables that have VectorizationComputeValue
are un-pickled correctly.
"""
path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "data", "old-bow-pickle.pkl"
)
data = Corpus.from_file(path)
self.assertEqual(len(data), 3)
self.assertIsInstance(data.domain["!"].compute_value, VectorizationComputeValue)
self.assertIsInstance(
data.domain["aboard"].compute_value, VectorizationComputeValue
)
self.assertIsNone(data.domain["!"].compute_value.variable)
self.assertIsNone(data.domain["aboard"].compute_value.variable)


if __name__ == "__main__":
unittest.main()
14 changes: 14 additions & 0 deletions orangecontrib/text/vectorization/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,17 @@ def __init__(self, compute_shared, name):
def compute(self, _, shared_data):
ind = shared_data.feature_name_to_index[self.name]
return shared_data.X[:, ind]

def __setstate__(self, state):
"""
Before orange3-text version 1.12.0 variable was wrongly set to current
variable (variable that has this compute value attached) instead of
original variable which caused fails after latest changes in core
Orange. Since variable from VectorizationComputeValue is never used in
practice we do not set it anymore (it is always None for
VectorizationComputeValue).
Anyway it is still set in pickles create before 1.12.0 and this line
removes it when unpickling old pickles.
"""
state["variable"] = None
self.__dict__.update(state)

0 comments on commit d450f48

Please sign in to comment.