Skip to content

Commit

Permalink
Merge pull request #866 from PrimozGodec/score-document-fix-emb
Browse files Browse the repository at this point in the history
[FIX] Score Documents - adapt to the latest changes in document embedding
  • Loading branch information
PrimozGodec authored Jun 17, 2022
2 parents c952337 + 738e315 commit c05b4fb
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 16 deletions.
8 changes: 6 additions & 2 deletions orangecontrib/text/widgets/owscoredocuments.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,14 @@ def _embedding_similarity(
emb = DocumentEmbedder(language)

cb_part = len(corpus) / (len(corpus) + len(words))
documet_embeddings, skipped = emb(corpus, wrap_callback(callback, 0, cb_part))
documet_embeddings, skipped = emb.transform(
corpus, wrap_callback(callback, 0, cb_part)
)
assert skipped is None

words = [[w] for w in words]
word_embeddings = np.array(
emb([[w] for w in words], wrap_callback(callback, cb_part, 1 - cb_part))
emb.transform(words, wrap_callback(callback, cb_part, 1 - cb_part))
)
return cosine_similarity(documet_embeddings.X, word_embeddings)

Expand Down
19 changes: 5 additions & 14 deletions orangecontrib/text/widgets/tests/test_owscoredocuments.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from Orange.widgets.tests.utils import simulate

from orangecontrib.text import Corpus, preprocess
from orangecontrib.text.vectorization.document_embedder import DocumentEmbedder
from orangecontrib.text.vectorization.document_embedder import _ServerEmbedder
from orangecontrib.text.widgets.owscoredocuments import (
OWScoreDocuments,
SelectionMethods,
Expand All @@ -22,17 +22,8 @@
from orangecontrib.text.widgets.utils.words import create_words_table


def embedding_mock(_, corpus, __):
if isinstance(corpus, list):
return np.ones((len(corpus), 10))
else: # corpus is Corpus
return (
Corpus.from_numpy(
domain=Domain([ContinuousVariable(str(i)) for i in range(10)]),
X=np.ones((len(corpus), 10)),
),
None,
)
def embedding_mock(_, data, callback=None):
return np.ones((len(data), 10))


class TestOWScoreDocuments(WidgetTest):
Expand Down Expand Up @@ -126,7 +117,7 @@ def test_guess_word_attribute(self):
self.send_signal(self.widget.Inputs.words, None)
self.assertIsNone(self.widget.words)

@patch.object(DocumentEmbedder, "__call__", new=embedding_mock)
@patch.object(_ServerEmbedder, "embedd_data", new=embedding_mock)
def test_change_scorer(self):
model = self.widget.model
self.send_signal(self.widget.Inputs.corpus, self.corpus)
Expand Down Expand Up @@ -224,7 +215,7 @@ def test_word_appearance(self):
self.wait_until_finished()
self.assertListEqual([x[1] for x in self.widget.model], [1, 1, 1])

@patch.object(DocumentEmbedder, "__call__", new=embedding_mock)
@patch.object(_ServerEmbedder, "embedd_data", new=embedding_mock)
def test_embedding_similarity(self):
corpus = self.create_corpus(
[
Expand Down

0 comments on commit c05b4fb

Please sign in to comment.