Skip to content

Commit

Permalink
Merge pull request #937 from PrimozGodec/corpus-preserve-name
Browse files Browse the repository at this point in the history
[FIX] Corpus - preserve name in extend_attributes
  • Loading branch information
PrimozGodec authored Feb 7, 2023
2 parents 28b7684 + 5c77b19 commit a846f1c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 5 deletions.
9 changes: 4 additions & 5 deletions orangecontrib/text/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def _setup_corpus(self, text_features: List[Variable] = None) -> None:

@property
def used_preprocessor(self):
return self.__used_preprocessor # type: PreprocessorList
return self.__used_preprocessor

@used_preprocessor.setter
def used_preprocessor(self, pp):
Expand Down Expand Up @@ -261,7 +261,7 @@ def _infer_text_features(self):
def extend_attributes(
self, X, feature_names, feature_values=None, compute_values=None,
var_attrs=None, sparse=False, rename_existing=False
):
):
"""
Append features to corpus. If `feature_values` argument is present,
features will be Discrete else Continuous.
Expand Down Expand Up @@ -308,9 +308,7 @@ def _rename_features(additional_names: List) -> Tuple[List, List, List]:
feature_values = [None] * X.shape[1]

# rename existing variables if required
curr_attributes, curr_class_var, curr_metas = _rename_features(
feature_names
)
curr_attributes, curr_class_var, curr_metas = _rename_features(feature_names)
if not rename_existing:
# rename new feature names if required
feature_names = get_unique_names(
Expand Down Expand Up @@ -341,6 +339,7 @@ def _rename_features(additional_names: List) -> Tuple[List, List, List]:
self.W.copy(),
text_features=copy(self.text_features),
)
c.name = self.name # keep corpus's name
Corpus.retain_preprocessing(self, c)
return c

Expand Down
10 changes: 10 additions & 0 deletions orangecontrib/text/tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,16 @@ def test_extend_attributes_keep_preprocessing(self):
self.assertEqual(new_c.ngram_range, c.ngram_range)
self.assertEqual(new_c.attributes, c.attributes)

def test_extend_attributes_keep_name(self):
"""
Test if corpus's name is kept after corpus's extension
"""
c = Corpus.from_file('book-excerpts')
self.assertEqual("book-excerpts", c.name)
x = np.random.random((len(c), 3))
new_c = c.extend_attributes(x, ['1', '2', '3'])
self.assertEqual("book-excerpts", new_c.name)

def test_from_table(self):
t = Table.from_file('brown-selected')
self.assertIsInstance(t, Table)
Expand Down
16 changes: 16 additions & 0 deletions orangecontrib/text/widgets/tests/test_owdocumentembedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,22 @@ def test_sbert(self):
self.assertTupleEqual(self.corpus.domain.metas, result.domain.metas)
self.assertEqual(384, len(result.domain.attributes))

@patch(PATCH_METHOD, make_dummy_post(b'{"embedding": [1.3, 1]}'))
def test_corpus_name_preserved(self):
# test on fasttext
self.send_signal("Corpus", self.corpus)
# just to make sure corpus already has a name
self.assertEqual("deerwester", self.corpus.name)
result = self.get_output(self.widget.Outputs.corpus)
self.assertIsNotNone(result)
self.assertEqual("deerwester", result.name)

# test on sbert
self.widget.findChildren(QRadioButton)[0].click()
result = self.get_output(self.widget.Outputs.corpus)
self.assertIsNotNone(result)
self.assertEqual("deerwester", result.name)


if __name__ == "__main__":
unittest.main()

0 comments on commit a846f1c

Please sign in to comment.