From c378f760c6a54ad7d6ccd1bcc8405415fde17289 Mon Sep 17 00:00:00 2001
From: Primoz Godec
Date: Thu, 26 May 2022 11:34:29 +0200
Subject: [PATCH] Document embedder - use base vectorizer
---
.../text/tests/test_documentembedder.py | 36 +--
.../text/vectorization/document_embedder.py | 46 ++--
.../text/widgets/owdocumentembedding.py | 248 +++++++-----------
.../widgets/tests/test_owdocumentembedding.py | 38 +--
.../text/widgets/utils/owbasevectorizer.py | 4 -
.../utils/tests/test_owbasevectorizer.py | 4 +-
6 files changed, 139 insertions(+), 237 deletions(-)
diff --git a/orangecontrib/text/tests/test_documentembedder.py b/orangecontrib/text/tests/test_documentembedder.py
index 97745a682..f77527368 100644
--- a/orangecontrib/text/tests/test_documentembedder.py
+++ b/orangecontrib/text/tests/test_documentembedder.py
@@ -34,22 +34,22 @@ def tearDown(self):
@patch(PATCH_METHOD)
def test_with_empty_corpus(self, mock):
- self.assertIsNone(self.embedder(self.corpus[:0])[0])
- self.assertIsNone(self.embedder(self.corpus[:0])[1])
+ self.assertIsNone(self.embedder.transform(self.corpus[:0])[0])
+ self.assertIsNone(self.embedder.transform(self.corpus[:0])[1])
mock.request.assert_not_called()
mock.get_response.assert_not_called()
self.assertEqual(self.embedder._embedder._cache._cache_dict, dict())
@patch(PATCH_METHOD, make_dummy_post(b'{"embedding": [0.3, 1]}'))
def test_success_subset(self):
- res, skipped = self.embedder(self.corpus[[0]])
+ res, skipped = self.embedder.transform(self.corpus[[0]])
assert_array_equal(res.X, [[0.3, 1]])
self.assertEqual(len(self.embedder._embedder._cache._cache_dict), 1)
self.assertIsNone(skipped)
@patch(PATCH_METHOD, make_dummy_post(b'{"embedding": [0.3, 1]}'))
def test_success_shapes(self):
- res, skipped = self.embedder(self.corpus)
+ res, skipped = self.embedder.transform(self.corpus)
self.assertEqual(res.X.shape, (len(self.corpus), 2))
self.assertEqual(len(res.domain.variables),
len(self.corpus.domain.variables) + 2)
@@ -58,7 +58,7 @@ def test_success_shapes(self):
@patch(PATCH_METHOD, make_dummy_post(b''))
def test_empty_response(self):
with self.assertWarns(RuntimeWarning):
- res, skipped = self.embedder(self.corpus[[0]])
+ res, skipped = self.embedder.transform(self.corpus[[0]])
self.assertIsNone(res)
self.assertEqual(len(skipped), 1)
self.assertEqual(len(self.embedder._embedder._cache._cache_dict), 0)
@@ -66,7 +66,7 @@ def test_empty_response(self):
@patch(PATCH_METHOD, make_dummy_post(b'str'))
def test_invalid_response(self):
with self.assertWarns(RuntimeWarning):
- res, skipped = self.embedder(self.corpus[[0]])
+ res, skipped = self.embedder.transform(self.corpus[[0]])
self.assertIsNone(res)
self.assertEqual(len(skipped), 1)
self.assertEqual(len(self.embedder._embedder._cache._cache_dict), 0)
@@ -74,7 +74,7 @@ def test_invalid_response(self):
@patch(PATCH_METHOD, make_dummy_post(b'{"embeddings": [0.3, 1]}'))
def test_invalid_json_key(self):
with self.assertWarns(RuntimeWarning):
- res, skipped = self.embedder(self.corpus[[0]])
+ res, skipped = self.embedder.transform(self.corpus[[0]])
self.assertIsNone(res)
self.assertEqual(len(skipped), 1)
self.assertEqual(len(self.embedder._embedder._cache._cache_dict), 0)
@@ -82,7 +82,7 @@ def test_invalid_json_key(self):
@patch(PATCH_METHOD, make_dummy_post(b'{"embedding": [0.3, 1]}'))
def test_persistent_caching(self):
self.assertEqual(len(self.embedder._embedder._cache._cache_dict), 0)
- self.embedder(self.corpus[[0]])
+ self.embedder.transform(self.corpus[[0]])
self.assertEqual(len(self.embedder._embedder._cache._cache_dict), 1)
self.embedder._embedder._cache.persist_cache()
@@ -98,7 +98,7 @@ def test_cache_for_different_languages(self):
embedder = DocumentEmbedder(language='sl')
embedder.clear_cache()
self.assertEqual(len(embedder._embedder._cache._cache_dict), 0)
- embedder(self.corpus[[0]])
+ embedder.transform(self.corpus[[0]])
self.assertEqual(len(embedder._embedder._cache._cache_dict), 1)
embedder._embedder._cache.persist_cache()
@@ -116,33 +116,27 @@ def test_cache_for_different_aggregators(self):
embedder = DocumentEmbedder(aggregator='max')
embedder.clear_cache()
self.assertEqual(len(embedder._embedder._cache._cache_dict), 0)
- embedder(self.corpus[[0]])
+ embedder.transform(self.corpus[[0]])
self.assertEqual(len(embedder._embedder._cache._cache_dict), 1)
embedder._embedder._cache.persist_cache()
embedder = DocumentEmbedder(aggregator='min')
self.assertEqual(len(embedder._embedder._cache._cache_dict), 1)
- embedder(self.corpus[[0]])
+ embedder.transform(self.corpus[[0]])
self.assertEqual(len(embedder._embedder._cache._cache_dict), 2)
- @patch(PATCH_METHOD, make_dummy_post(b'{"embedding": [0.3, 1]}'))
- def test_with_statement(self):
- with self.embedder as embedder:
- res, skipped = embedder(self.corpus[[0]])
- assert_array_equal(res.X, [[0.3, 1]])
-
@patch(PATCH_METHOD, make_dummy_post(b'{"embedding": [0.3, 1]}'))
def test_cancel(self):
self.assertFalse(self.embedder._embedder._cancelled)
self.embedder._embedder._cancelled = True
with self.assertRaises(Exception):
- self.embedder(self.corpus[[0]])
+ self.embedder.transform(self.corpus[[0]])
@patch(PATCH_METHOD, side_effect=OSError)
def test_connection_error(self, _):
embedder = DocumentEmbedder()
with self.assertRaises(ConnectionError):
- embedder(self.corpus[[0]])
+ embedder.transform(self.corpus[[0]])
def test_invalid_parameters(self):
with self.assertRaises(ValueError):
@@ -150,10 +144,6 @@ def test_invalid_parameters(self):
with self.assertRaises(ValueError):
self.embedder = DocumentEmbedder(aggregator='average')
- def test_invalid_corpus_type(self):
- with self.assertRaises(ValueError):
- self.embedder(self.corpus[0])
-
if __name__ == "__main__":
unittest.main()
diff --git a/orangecontrib/text/vectorization/document_embedder.py b/orangecontrib/text/vectorization/document_embedder.py
index ad4d476a7..5f82f99be 100644
--- a/orangecontrib/text/vectorization/document_embedder.py
+++ b/orangecontrib/text/vectorization/document_embedder.py
@@ -6,13 +6,14 @@
import sys
import warnings
import zlib
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, Optional, Tuple
import numpy as np
from Orange.misc.server_embedder import ServerEmbedderCommunicator
from Orange.util import dummy_callback
from orangecontrib.text import Corpus
+from orangecontrib.text.vectorization.base import BaseVectorizer
AGGREGATORS = ['Mean', 'Sum', 'Max', 'Min']
AGGREGATORS_L = ['mean', 'sum', 'max', 'min']
@@ -52,7 +53,7 @@
LANGUAGES = list(LANGS_TO_ISO.values())
-class DocumentEmbedder:
+class DocumentEmbedder(BaseVectorizer):
"""This class is used for obtaining dense embeddings of documents in
corpus using fastText pretrained models from:
E. Grave, P. Bojanowski, P. Gupta, A. Joulin, T. Mikolov,
@@ -93,9 +94,9 @@ def __init__(self, language: str = 'en',
server_url='https://apiv2.garaza.io',
embedder_type='text')
- def __call__(
- self, corpus: Union[Corpus, List[List[str]]], callback=dummy_callback
- ) -> Union[Tuple[Corpus, Corpus], List[Optional[List[float]]]]:
+ def _transform(
+ self, corpus: Corpus, _, callback=dummy_callback
+ ) -> Tuple[Corpus, Corpus]:
"""Adds matrix of document embeddings to a corpus.
Parameters
@@ -109,14 +110,7 @@ def __call__(
Corpus (original or a copy) with new features added.
Skipped documents
Corpus of documents that were not embedded
-
- Raises
- ------
- ValueError
- If corpus is not instance of Corpus.
"""
- if not isinstance(corpus, (Corpus, list)):
- raise ValueError("Input should be instance of Corpus or list.")
embs = self._embedder.embedd_data(
list(corpus.ngrams) if isinstance(corpus, Corpus) else corpus,
callback=callback,
@@ -135,12 +129,6 @@ def __call__(
skipped_documents = [emb is None for emb in embs]
embedded_documents = np.logical_not(skipped_documents)
- variable_attrs = {
- 'hidden': True,
- 'skip-normalization': True,
- 'embedding-feature': True
- }
-
new_corpus = None
if np.any(embedded_documents):
# if at least one embedding is not None, extend attributes
@@ -150,18 +138,22 @@ def __call__(
[e for e, ns in zip(embs, embedded_documents) if ns],
dtype=float,
),
- ['Dim{}'.format(i + 1) for i in range(dim)],
- var_attrs=variable_attrs
+ ["Dim{}".format(i + 1) for i in range(dim)],
+ var_attrs={
+ "embedding-feature": True,
+ "hidden": True,
+ },
)
skipped_corpus = None
if np.any(skipped_documents):
skipped_corpus = corpus[skipped_documents].copy()
skipped_corpus.name = "Skipped documents"
- warnings.warn(("Some documents were not embedded for " +
- "unknown reason. Those documents " +
- "are skipped."),
- RuntimeWarning)
+ warnings.warn(
+ "Some documents were not embedded for unknown reason. Those "
+ "documents are skipped.",
+ RuntimeWarning,
+ )
return new_corpus, skipped_corpus
@@ -181,12 +173,6 @@ def clear_cache(self):
if self._embedder:
self._embedder.clear_cache()
- def __enter__(self):
- return self
-
- def __exit__(self, _, __, ___):
- pass
-
class _ServerEmbedder(ServerEmbedderCommunicator):
def __init__(self, aggregator: str, *args, **kwargs) -> None:
diff --git a/orangecontrib/text/widgets/owdocumentembedding.py b/orangecontrib/text/widgets/owdocumentembedding.py
index 7ac6c6a84..8bae94e30 100644
--- a/orangecontrib/text/widgets/owdocumentembedding.py
+++ b/orangecontrib/text/widgets/owdocumentembedding.py
@@ -1,180 +1,115 @@
-from typing import Any, Tuple
-
-from AnyQt.QtWidgets import QPushButton, QStyle, QLayout
-from AnyQt.QtCore import Qt, QSize
-
-from Orange.widgets.gui import widgetBox, comboBox, auto_commit, hBox
-from Orange.widgets.settings import Setting
-from Orange.widgets.widget import OWWidget, Msg, Input, Output
-from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState
+from typing import Dict, Optional, Any
+from AnyQt.QtCore import Qt
+from AnyQt.QtWidgets import QGridLayout, QLabel, QPushButton, QStyle
from Orange.misc.utils.embedder_utils import EmbeddingConnectionError
+from Orange.widgets import gui
+from Orange.widgets.settings import Setting
+from Orange.widgets.widget import Msg, Output, OWWidget
-from orangecontrib.text.vectorization.document_embedder import DocumentEmbedder
-from orangecontrib.text.vectorization.document_embedder import LANGS_TO_ISO, AGGREGATORS
from orangecontrib.text.corpus import Corpus
-
+from orangecontrib.text.vectorization.document_embedder import (
+ AGGREGATORS,
+ LANGS_TO_ISO,
+ DocumentEmbedder,
+)
+from orangecontrib.text.widgets.utils import widgets
+from orangecontrib.text.widgets.utils.owbasevectorizer import (
+ OWBaseVectorizer,
+ Vectorizer,
+)
LANGUAGES = sorted(list(LANGS_TO_ISO.keys()))
-def run_pretrained_embedder(corpus: Corpus,
- language: str,
- aggregator: str,
- state: TaskState) -> Tuple[Corpus, Corpus]:
- """Runs DocumentEmbedder.
-
- Parameters
- ----------
- corpus : Corpus
- Corpus on which transform is performed.
- language : str
- ISO 639-1 (two-letter) code of desired language.
- aggregator : str
- Aggregator which creates document embedding (single
- vector) from word embeddings (multiple vectors).
- Allowed values are mean, sum, max, min.
- state : TaskState
- State object.
-
- Returns
- -------
- Corpus
- New corpus with additional features.
- """
- embedder = DocumentEmbedder(language=language, aggregator=aggregator)
-
- def callback(progress):
- if state.is_interruption_requested():
- raise Exception
- state.set_progress_value(progress * 100)
-
- new_corpus, skipped_corpus = embedder(corpus, callback=callback)
- return new_corpus, skipped_corpus
-
-
-class OWDocumentEmbedding(OWWidget, ConcurrentWidgetMixin):
+class EmbeddingVectorizer(Vectorizer):
+ skipped_documents = None
+
+ def _transform(self, callback):
+ embeddings, skipped = self.method.transform(self.corpus, callback=callback)
+ self.new_corpus = embeddings
+ self.skipped_documents = skipped
+
+
+class OWDocumentEmbedding(OWBaseVectorizer):
name = "Document Embedding"
description = "Document embedding using pretrained models."
- keywords = ['embedding', 'document embedding', 'text']
- icon = 'icons/TextEmbedding.svg'
+ keywords = ["embedding", "document embedding", "text"]
+ icon = "icons/TextEmbedding.svg"
priority = 300
- want_main_area = False
- _auto_apply = Setting(default=True)
+ buttons_area_orientation = Qt.Vertical
+ settings_version = 2
- class Inputs:
- corpus = Input('Corpus', Corpus)
+ Method = DocumentEmbedder
- class Outputs:
- new_corpus = Output('Embeddings', Corpus, default=True)
- skipped = Output('Skipped documents', Corpus)
+ class Outputs(OWBaseVectorizer.Outputs):
+ skipped = Output("Skipped documents", Corpus)
class Error(OWWidget.Error):
- no_connection = Msg("No internet connection. " +
- "Please establish a connection or " +
- "use another vectorizer.")
- unexpected_error = Msg('Embedding error: {}')
+ no_connection = Msg(
+ "No internet connection. Please establish a connection or use "
+ "another vectorizer."
+ )
+ unexpected_error = Msg("Embedding error: {}")
class Warning(OWWidget.Warning):
- unsuccessful_embeddings = Msg('Some embeddings were unsuccessful.')
+ unsuccessful_embeddings = Msg("Some embeddings were unsuccessful.")
- language = Setting(default=LANGUAGES.index("English"))
- aggregator = Setting(default=0)
+ language = Setting(default="English")
+ aggregator = Setting(default="Mean")
def __init__(self):
- OWWidget.__init__(self)
- ConcurrentWidgetMixin.__init__(self)
-
- self.aggregators = AGGREGATORS
- self.corpus = None
- self.new_corpus = None
- self._setup_layout()
-
- @staticmethod
- def sizeHint():
- return QSize(300, 300)
-
- def _setup_layout(self):
- self.controlArea.setMinimumWidth(self.sizeHint().width())
- self.layout().setSizeConstraint(QLayout.SetFixedSize)
-
- widget_box = widgetBox(self.controlArea, 'Settings')
-
- self.language_cb = comboBox(
- widget=widget_box,
- master=self,
- value='language',
- label='Language: ',
- orientation=Qt.Horizontal,
- items=LANGUAGES,
- callback=self._option_changed,
- searchable=True
- )
-
- self.aggregator_cb = comboBox(widget=widget_box,
- master=self,
- value='aggregator',
- label='Aggregator: ',
- orientation=Qt.Horizontal,
- items=self.aggregators,
- callback=self._option_changed)
-
- self.auto_commit_widget = auto_commit(widget=self.controlArea,
- master=self,
- value='_auto_apply',
- label='Apply',
- commit=self.commit,
- box=False)
-
+ super().__init__()
self.cancel_button = QPushButton(
- 'Cancel',
- icon=self.style()
- .standardIcon(QStyle.SP_DialogCancelButton))
-
+ "Cancel", icon=self.style().standardIcon(QStyle.SP_DialogCancelButton)
+ )
self.cancel_button.clicked.connect(self.cancel)
-
- hbox = hBox(self.controlArea)
- hbox.layout().addWidget(self.cancel_button)
+ self.buttonsArea.layout().addWidget(self.cancel_button)
self.cancel_button.setDisabled(True)
- @Inputs.corpus
- def set_data(self, data):
- self.Warning.clear()
- self.cancel()
+ def create_configuration_layout(self):
+ layout = QGridLayout()
+ layout.setSpacing(10)
- if not data:
- self.corpus = None
- self.clear_outputs()
- return
+ combo = widgets.ComboBox(
+ self,
+ "language",
+ items=LANGUAGES,
+ )
+ combo.currentIndexChanged.connect(self.on_change)
+ layout.addWidget(QLabel("Language:"))
+ layout.addWidget(combo, 0, 1)
- self.corpus = data
- self.unconditional_commit()
+ combo = widgets.ComboBox(self, "aggregator", items=AGGREGATORS)
+ combo.currentIndexChanged.connect(self.on_change)
+ layout.addWidget(QLabel("Aggregator:"))
+ layout.addWidget(combo, 1, 1)
- def _option_changed(self):
- self.commit()
+ return layout
- def commit(self):
- if self.corpus is None:
- self.clear_outputs()
- return
+ def update_method(self):
+ self.vectorizer = EmbeddingVectorizer(self.init_method(), self.corpus)
- self.cancel_button.setDisabled(False)
-
- self.start(run_pretrained_embedder,
- self.corpus,
- LANGS_TO_ISO[LANGUAGES[self.language]],
- self.aggregators[self.aggregator])
+ def init_method(self):
+ return self.Method(
+ language=LANGS_TO_ISO[self.language], aggregator=self.aggregator
+ )
+ @gui.deferred
+ def commit(self):
self.Error.clear()
+ self.Warning.clear()
+ self.cancel_button.setDisabled(False)
+ super().commit()
- def on_done(self, embeddings: Tuple[Corpus, Corpus]) -> None:
+ def on_done(self, _):
self.cancel_button.setDisabled(True)
- self._send_output_signals(embeddings[0], embeddings[1])
-
- def on_partial_result(self, result: Any):
- self.cancel()
- self.Error.no_connection()
+ skipped = self.vectorizer.skipped_documents
+ self.Outputs.skipped.send(skipped)
+ if skipped is not None and len(skipped) > 0:
+ self.Warning.unsuccessful_embeddings()
+ super().on_done(_)
def on_exception(self, ex: Exception):
self.cancel_button.setDisabled(True)
@@ -183,27 +118,22 @@ def on_exception(self, ex: Exception):
else:
self.Error.unexpected_error(type(ex).__name__)
self.cancel()
- self.clear_outputs()
def cancel(self):
+ self.Outputs.skipped.send(None)
self.cancel_button.setDisabled(True)
super().cancel()
- def _send_output_signals(self, embeddings, skipped):
- self.Outputs.new_corpus.send(embeddings)
- self.Outputs.skipped.send(skipped)
- unsuccessful = len(skipped) if skipped else 0
- if unsuccessful > 0:
- self.Warning.unsuccessful_embeddings()
+ @classmethod
+ def migrate_settings(cls, settings: Dict[str, Any], version: Optional[int]):
+ if version is None or version < 2:
+ # before version 2 settings were indexes now they are strings
+ # with language name and selected aggregator name
+ settings["language"] = LANGUAGES[settings["language"]]
+ settings["aggregator"] = AGGREGATORS[settings["aggregator"]]
- def clear_outputs(self):
- self._send_output_signals(None, None)
- def onDeleteWidget(self):
- self.cancel()
- super().onDeleteWidget()
-
-
-if __name__ == '__main__':
+if __name__ == "__main__":
from orangewidget.utils.widgetpreview import WidgetPreview
- WidgetPreview(OWDocumentEmbedding).run(Corpus.from_file('book-excerpts'))
+
+ WidgetPreview(OWDocumentEmbedding).run(Corpus.from_file("book-excerpts"))
diff --git a/orangecontrib/text/widgets/tests/test_owdocumentembedding.py b/orangecontrib/text/widgets/tests/test_owdocumentembedding.py
index 9ade7ca5a..7a0210a1c 100644
--- a/orangecontrib/text/widgets/tests/test_owdocumentembedding.py
+++ b/orangecontrib/text/widgets/tests/test_owdocumentembedding.py
@@ -1,6 +1,7 @@
import unittest
from unittest.mock import Mock, patch
+from AnyQt.QtWidgets import QComboBox
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.utils import simulate
from Orange.misc.utils.embedder_utils import EmbeddingConnectionError
@@ -15,7 +16,6 @@ async def none_method(_, __):
class TestOWDocumentEmbedding(WidgetTest):
-
def setUp(self):
self.widget = self.create_widget(OWDocumentEmbedding)
self.corpus = Corpus.from_file('deerwester')
@@ -33,21 +33,23 @@ def test_input(self):
@patch(PATCH_METHOD, make_dummy_post(b'{"embedding": [1.3, 1]}'))
def test_output(self):
self.send_signal("Corpus", None)
- self.assertIsNone(self.get_output(self.widget.Outputs.new_corpus))
+ self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
self.send_signal("Corpus", self.corpus)
self.wait_until_finished()
- result = self.get_output(self.widget.Outputs.new_corpus)
+ result = self.get_output(self.widget.Outputs.corpus)
self.assertIsNotNone(result)
self.assertIsInstance(result, Corpus)
self.assertEqual(len(self.corpus), len(result))
@patch(PATCH_METHOD, make_dummy_post(b''))
def test_some_failed(self):
- simulate.combobox_activate_index(self.widget.controls.aggregator, 1)
+ simulate.combobox_activate_index(
+ self.widget.controlArea.findChildren(QComboBox)[1], 1
+ )
self.send_signal("Corpus", self.corpus)
self.wait_until_finished()
- result = self.get_output(self.widget.Outputs.new_corpus)
+ result = self.get_output(self.widget.Outputs.corpus)
skipped = self.get_output(self.widget.Outputs.skipped)
self.assertIsNone(result)
self.assertEqual(len(skipped), len(self.corpus))
@@ -58,7 +60,7 @@ def test_cancel_embedding(self):
self.send_signal("Corpus", self.larger_corpus)
self.widget.cancel_button.click()
self.wait_until_finished()
- self.assertIsNone(self.get_output(self.widget.Outputs.new_corpus))
+ self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
@patch('orangecontrib.text.vectorization.document_embedder' +
'._ServerEmbedder.embedd_data',
@@ -66,42 +68,40 @@ def test_cancel_embedding(self):
def test_connection_error(self, _):
self.send_signal("Corpus", self.corpus)
self.wait_until_finished()
- self.assertIsNone(self.get_output(self.widget.Outputs.new_corpus))
+ self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
self.assertTrue(self.widget.Error.no_connection.is_shown())
- @patch('orangecontrib.text.vectorization.document_embedder' +
- '.DocumentEmbedder.__call__',
- side_effect=OSError)
+ @patch(
+ "orangecontrib.text.vectorization.document_embedder"
+ + ".DocumentEmbedder.transform",
+ side_effect=OSError,
+ )
def test_unexpected_error(self, _):
self.send_signal("Corpus", self.corpus)
self.wait_until_finished()
- self.assertIsNone(self.get_output(self.widget.Outputs.new_corpus))
+ self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
self.assertTrue(self.widget.Error.unexpected_error.is_shown())
@patch(PATCH_METHOD, make_dummy_post(b'{"embedding": [1.3, 1]}'))
def test_rerun_on_new_data(self):
""" Check if embedding is automatically re-run on new data """
self.widget._auto_apply = False
- self.assertIsNone(self.get_output(self.widget.Outputs.new_corpus))
+ self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
self.send_signal(self.widget.Inputs.corpus, self.corpus[:3])
self.wait_until_finished()
- self.assertEqual(
- 3, len(self.get_output(self.widget.Outputs.new_corpus))
- )
+ self.assertEqual(3, len(self.get_output(self.widget.Outputs.corpus)))
self.send_signal(self.widget.Inputs.corpus, self.corpus[:1])
self.wait_until_finished()
- self.assertEqual(
- 1, len(self.get_output(self.widget.Outputs.new_corpus))
- )
+ self.assertEqual(1, len(self.get_output(self.widget.Outputs.corpus)))
@patch('orangecontrib.text.vectorization.document_embedder' +
'._ServerEmbedder._encode_data_instance', none_method)
def test_skipped_documents(self):
self.send_signal("Corpus", self.corpus)
self.wait_until_finished()
- self.assertIsNone(self.get_output(self.widget.Outputs.new_corpus))
+ self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
self.assertEqual(len(self.get_output(self.widget.Outputs.skipped)), len(self.corpus))
self.assertTrue(self.widget.Warning.unsuccessful_embeddings.is_shown())
diff --git a/orangecontrib/text/widgets/utils/owbasevectorizer.py b/orangecontrib/text/widgets/utils/owbasevectorizer.py
index 8ae67699d..41b029775 100644
--- a/orangecontrib/text/widgets/utils/owbasevectorizer.py
+++ b/orangecontrib/text/widgets/utils/owbasevectorizer.py
@@ -125,7 +125,3 @@ def create_configuration_layout(self):
def cancel(self):
self.Outputs.corpus.send(None)
super().cancel()
-
- def onDeleteWidget(self):
- self.cancel()
- super().onDeleteWidget()
diff --git a/orangecontrib/text/widgets/utils/tests/test_owbasevectorizer.py b/orangecontrib/text/widgets/utils/tests/test_owbasevectorizer.py
index 4b1ed35e7..7f494a179 100644
--- a/orangecontrib/text/widgets/utils/tests/test_owbasevectorizer.py
+++ b/orangecontrib/text/widgets/utils/tests/test_owbasevectorizer.py
@@ -15,8 +15,8 @@ class TestableBaseVectWidget(OWBaseVectorizer):
def create_configuration_layout(self):
return QVBoxLayout()
- def update_method(self):
- self.method = self.Method()
+ def init_method(self):
+ return self.Method()
class TestOWBaseVectorizer(WidgetTest):