From 738e3154c225614b3ebeb562a1a7f2c586b83356 Mon Sep 17 00:00:00 2001 From: Primoz Godec Date: Fri, 17 Jun 2022 10:17:45 +0200 Subject: [PATCH] Score Documents - adapt to the latest changes in document embedding --- .../text/widgets/owscoredocuments.py | 8 ++++++-- .../widgets/tests/test_owscoredocuments.py | 19 +++++-------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/orangecontrib/text/widgets/owscoredocuments.py b/orangecontrib/text/widgets/owscoredocuments.py index 7efa08491..31393bf79 100644 --- a/orangecontrib/text/widgets/owscoredocuments.py +++ b/orangecontrib/text/widgets/owscoredocuments.py @@ -74,10 +74,14 @@ def _embedding_similarity( emb = DocumentEmbedder(language) cb_part = len(corpus) / (len(corpus) + len(words)) - documet_embeddings, skipped = emb(corpus, wrap_callback(callback, 0, cb_part)) + documet_embeddings, skipped = emb.transform( + corpus, wrap_callback(callback, 0, cb_part) + ) assert skipped is None + + words = [[w] for w in words] word_embeddings = np.array( - emb([[w] for w in words], wrap_callback(callback, cb_part, 1 - cb_part)) + emb.transform(words, wrap_callback(callback, cb_part, 1 - cb_part)) ) return cosine_similarity(documet_embeddings.X, word_embeddings) diff --git a/orangecontrib/text/widgets/tests/test_owscoredocuments.py b/orangecontrib/text/widgets/tests/test_owscoredocuments.py index 5d545ba19..717fd89e9 100644 --- a/orangecontrib/text/widgets/tests/test_owscoredocuments.py +++ b/orangecontrib/text/widgets/tests/test_owscoredocuments.py @@ -13,7 +13,7 @@ from Orange.widgets.tests.utils import simulate from orangecontrib.text import Corpus, preprocess -from orangecontrib.text.vectorization.document_embedder import DocumentEmbedder +from orangecontrib.text.vectorization.document_embedder import _ServerEmbedder from orangecontrib.text.widgets.owscoredocuments import ( OWScoreDocuments, SelectionMethods, @@ -22,17 +22,8 @@ from orangecontrib.text.widgets.utils.words import create_words_table -def embedding_mock(_, corpus, __): - if isinstance(corpus, list): - return np.ones((len(corpus), 10)) - else: # corpus is Corpus - return ( - Corpus.from_numpy( - domain=Domain([ContinuousVariable(str(i)) for i in range(10)]), - X=np.ones((len(corpus), 10)), - ), - None, - ) +def embedding_mock(_, data, callback=None): + return np.ones((len(data), 10)) class TestOWScoreDocuments(WidgetTest): @@ -126,7 +117,7 @@ def test_guess_word_attribute(self): self.send_signal(self.widget.Inputs.words, None) self.assertIsNone(self.widget.words) - @patch.object(DocumentEmbedder, "__call__", new=embedding_mock) + @patch.object(_ServerEmbedder, "embedd_data", new=embedding_mock) def test_change_scorer(self): model = self.widget.model self.send_signal(self.widget.Inputs.corpus, self.corpus) @@ -224,7 +215,7 @@ def test_word_appearance(self): self.wait_until_finished() self.assertListEqual([x[1] for x in self.widget.model], [1, 1, 1]) - @patch.object(DocumentEmbedder, "__call__", new=embedding_mock) + @patch.object(_ServerEmbedder, "embedd_data", new=embedding_mock) def test_embedding_similarity(self): corpus = self.create_corpus( [