Skip to content

Commit

Permalink
Merge pull request #919 from VesnaT/language_unpickle
Browse files Browse the repository at this point in the history
[FIX] Corpus: Unpickle corpus without language
  • Loading branch information
PrimozGodec authored Nov 11, 2022
2 parents 5c55009 + d650aee commit f969494
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 2 deletions.
7 changes: 6 additions & 1 deletion orangecontrib/text/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from copy import copy, deepcopy
from numbers import Integral
from itertools import chain
from typing import Union, Optional, List, Tuple
from typing import Union, Optional, List, Tuple, Dict
from warnings import warn

import nltk
Expand Down Expand Up @@ -368,6 +368,11 @@ def titles(self):
def language(self):
return self.attributes["language"]

def __setstate__(self, state: Dict):
super().__setstate__(state)
if "language" not in self.attributes:
self.attributes["language"] = None

def documents_from_features(self, feats):
"""
Args:
Expand Down
6 changes: 6 additions & 0 deletions orangecontrib/text/tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,12 @@ def test_remove_attributes_copy(self):
"""
self.assertLess(datetime.today(), datetime(2024, 1, 1))

def test_language_unpickle(self):
path = os.path.dirname(__file__)
file = os.path.abspath(os.path.join(path, "data", "book-excerpts.pkl"))
corpus = Corpus.from_file(file)
self.assertIsNone(corpus.attributes["language"])


@skipIf(summarize is None, "summarize is not available for orange3<=3.28")
class TestCorpusSummaries(unittest.TestCase):
Expand Down
12 changes: 11 additions & 1 deletion orangecontrib/text/widgets/tests/test_owcorpus.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import unittest

import numpy as np
Expand Down Expand Up @@ -325,6 +326,15 @@ def test_guess_language(self):
self.assertEqual("English", self.widget.language)
self.assertEqual("en", self.get_output(self.widget.Outputs.corpus).language)

def test_language_unpickle(self):
path = os.path.dirname(__file__)
file = os.path.abspath(os.path.join(path, "..", "..", "tests",
"data", "book-excerpts.pkl"))
corpus = Corpus.from_file(file)
self.send_signal(self.widget.Inputs.data, corpus)
self.wait_until_finished()
self.assertEqual(self.widget.language, "English")


if __name__ == "__main__":
unittest.main()
unittest.main()

0 comments on commit f969494

Please sign in to comment.