Skip to content

Commit

Permalink
[FIX] VectorizationComputeValue - fix unpickling old pickles
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Sep 16, 2022
1 parent 2bb750f commit 0f29b13
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 1 deletion.
Binary file added orangecontrib/text/tests/data/old-bow-pickle.pkl
Binary file not shown.
36 changes: 35 additions & 1 deletion orangecontrib/text/tests/test_vectorization_base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import os
import unittest

import numpy as np
import scipy.sparse as sp

from orangecontrib.text.corpus import Corpus
from orangecontrib.text.vectorization.base import BaseVectorizer
from orangecontrib.text.vectorization.base import (
BaseVectorizer,
VectorizationComputeValue,
)


class BaseVectorizationTest(unittest.TestCase):
Expand Down Expand Up @@ -32,3 +36,33 @@ def test_variable_attributes(self):

for a in c2.domain.attributes:
self.assertIn('foo', a.attributes)


class TestVectorizationComputeValue(unittest.TestCase):
def test_unpickling_old_pickle(self):
"""
Before orange3-text version 1.12.0 variable was wrongly set to current
variable (variable that has this compute value attached) instead of
original variable which caused fails after latest changes in core
Orange. Since variable from VectorizationComputeValue is never used in
practice we do not set it anymore (it is always None for
VectorizationComputeValue).
Anyway it is still set in pickles create before 1.12.0. With this test
we test that old pickle with variables that have VectorizationComputeValue
are un-pickled correctly.
"""
path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "data", "old-bow-pickle.pkl"
)
data = Corpus.from_file(path)
self.assertEqual(len(data), 3)
self.assertIsInstance(data.domain["!"].compute_value, VectorizationComputeValue)
self.assertIsInstance(
data.domain["aboard"].compute_value, VectorizationComputeValue
)
self.assertIsNone(data.domain["!"].compute_value.variable)
self.assertIsNone(data.domain["aboard"].compute_value.variable)


if __name__ == "__main__":
unittest.main()
14 changes: 14 additions & 0 deletions orangecontrib/text/vectorization/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,17 @@ def __init__(self, compute_shared, name):
def compute(self, _, shared_data):
ind = shared_data.feature_name_to_index[self.name]
return shared_data.X[:, ind]

def __setstate__(self, state):
"""
Before orange3-text version 1.12.0 variable was wrongly set to current
variable (variable that has this compute value attached) instead of
original variable which caused fails after latest changes in core
Orange. Since variable from VectorizationComputeValue is never used in
practice we do not set it anymore (it is always None for
VectorizationComputeValue).
Anyway it is still set in pickles create before 1.12.0 and this line
removes it when unpickling old pickles.
"""
state["variable"] = None
self.__dict__.update(state)

0 comments on commit 0f29b13

Please sign in to comment.