From 738e3154c225614b3ebeb562a1a7f2c586b83356 Mon Sep 17 00:00:00 2001
From: Primoz Godec
Date: Fri, 17 Jun 2022 10:17:45 +0200
Subject: [PATCH] Score Documents - adapt to the latest changes in document
embedding
---
.../text/widgets/owscoredocuments.py | 8 ++++++--
.../widgets/tests/test_owscoredocuments.py | 19 +++++--------------
2 files changed, 11 insertions(+), 16 deletions(-)
diff --git a/orangecontrib/text/widgets/owscoredocuments.py b/orangecontrib/text/widgets/owscoredocuments.py
index 7efa08491..31393bf79 100644
--- a/orangecontrib/text/widgets/owscoredocuments.py
+++ b/orangecontrib/text/widgets/owscoredocuments.py
@@ -74,10 +74,14 @@ def _embedding_similarity(
emb = DocumentEmbedder(language)
cb_part = len(corpus) / (len(corpus) + len(words))
- documet_embeddings, skipped = emb(corpus, wrap_callback(callback, 0, cb_part))
+ documet_embeddings, skipped = emb.transform(
+ corpus, wrap_callback(callback, 0, cb_part)
+ )
assert skipped is None
+
+ words = [[w] for w in words]
word_embeddings = np.array(
- emb([[w] for w in words], wrap_callback(callback, cb_part, 1 - cb_part))
+ emb.transform(words, wrap_callback(callback, cb_part, 1 - cb_part))
)
return cosine_similarity(documet_embeddings.X, word_embeddings)
diff --git a/orangecontrib/text/widgets/tests/test_owscoredocuments.py b/orangecontrib/text/widgets/tests/test_owscoredocuments.py
index 5d545ba19..717fd89e9 100644
--- a/orangecontrib/text/widgets/tests/test_owscoredocuments.py
+++ b/orangecontrib/text/widgets/tests/test_owscoredocuments.py
@@ -13,7 +13,7 @@
from Orange.widgets.tests.utils import simulate
from orangecontrib.text import Corpus, preprocess
-from orangecontrib.text.vectorization.document_embedder import DocumentEmbedder
+from orangecontrib.text.vectorization.document_embedder import _ServerEmbedder
from orangecontrib.text.widgets.owscoredocuments import (
OWScoreDocuments,
SelectionMethods,
@@ -22,17 +22,8 @@
from orangecontrib.text.widgets.utils.words import create_words_table
-def embedding_mock(_, corpus, __):
- if isinstance(corpus, list):
- return np.ones((len(corpus), 10))
- else: # corpus is Corpus
- return (
- Corpus.from_numpy(
- domain=Domain([ContinuousVariable(str(i)) for i in range(10)]),
- X=np.ones((len(corpus), 10)),
- ),
- None,
- )
+def embedding_mock(_, data, callback=None):
+ return np.ones((len(data), 10))
class TestOWScoreDocuments(WidgetTest):
@@ -126,7 +117,7 @@ def test_guess_word_attribute(self):
self.send_signal(self.widget.Inputs.words, None)
self.assertIsNone(self.widget.words)
- @patch.object(DocumentEmbedder, "__call__", new=embedding_mock)
+ @patch.object(_ServerEmbedder, "embedd_data", new=embedding_mock)
def test_change_scorer(self):
model = self.widget.model
self.send_signal(self.widget.Inputs.corpus, self.corpus)
@@ -224,7 +215,7 @@ def test_word_appearance(self):
self.wait_until_finished()
self.assertListEqual([x[1] for x in self.widget.model], [1, 1, 1])
- @patch.object(DocumentEmbedder, "__call__", new=embedding_mock)
+ @patch.object(_ServerEmbedder, "embedd_data", new=embedding_mock)
def test_embedding_similarity(self):
corpus = self.create_corpus(
[