From 3859ad12bfb0583cb06ce07734f3710381d2f883 Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Thu, 26 Jan 2023 11:23:51 +0100 Subject: [PATCH] Keywords - replace embedding with MBERT --- orangecontrib/text/widgets/owkeywords.py | 11 +--------- .../text/widgets/tests/test_owkeywords.py | 22 +++++++++---------- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/orangecontrib/text/widgets/owkeywords.py b/orangecontrib/text/widgets/owkeywords.py index 973810f61..eb4d761a4 100644 --- a/orangecontrib/text/widgets/owkeywords.py +++ b/orangecontrib/text/widgets/owkeywords.py @@ -324,12 +324,6 @@ def __on_rake_lang_changed(self): del self.__cached_keywords[ScoringMethods.RAKE] self.update_scores() - def __on_emb_lang_changed(self): - if ScoringMethods.EMBEDDING in self.selected_scoring_methods: - if ScoringMethods.EMBEDDING in self.__cached_keywords: - del self.__cached_keywords[ScoringMethods.EMBEDDING] - self.update_scores() - def __on_filter_changed(self): model = self.view.model() model.setFilterFixedString(self.__filter_line_edit.text().strip()) @@ -389,10 +383,7 @@ def update_scores(self): }, ScoringMethods.RAKE: { "language": RAKE_LANGUAGES[self.rake_lang_index], - "max_len": self.corpus.ngram_range[1] if self.corpus else 1 - }, - ScoringMethods.EMBEDDING: { - "language": EMBEDDING_LANGUAGES[self.embedding_lang_index], + "max_len": self.corpus.ngram_range[1] if self.corpus else 1, }, } self.start(run, self.corpus, self.words, self.__cached_keywords, diff --git a/orangecontrib/text/widgets/tests/test_owkeywords.py b/orangecontrib/text/widgets/tests/test_owkeywords.py index 171db94b8..c75734f1f 100644 --- a/orangecontrib/text/widgets/tests/test_owkeywords.py +++ b/orangecontrib/text/widgets/tests/test_owkeywords.py @@ -1,5 +1,4 @@ # pylint: disable=missing-docstring -from typing import List import unittest from unittest.mock import Mock, patch @@ -175,15 +174,17 @@ def test_sort_nans_asc(self): def test_scoring_methods(self): # speed-up the test execution - def dummy_embedding(tokens, language, progress_callback=None): - return tfidf_keywords(tokens, progress_callback) - - methods = [("TF-IDF", Mock(wraps=tfidf_keywords)), - ("YAKE!", Mock(wraps=yake_keywords)), - ("Rake", Mock(wraps=rake_keywords)), - ("Embedding", Mock(side_effect=dummy_embedding))] + def dummy_mbert(tokens, progress_callback=None): + return [[("kw1", 0.2), ("kw2", 0.3)] * len(tokens)] + + methods = [ + ("TF-IDF", Mock(wraps=tfidf_keywords)), + ("YAKE!", Mock(wraps=yake_keywords)), + ("Rake", Mock(wraps=rake_keywords)), + ("MBERT", Mock(side_effect=dummy_mbert)), + ] with patch.object(ScoringMethods, "ITEMS", methods) as m: - scores = {"TF-IDF", "YAKE!", "Rake", "Embedding"} + scores = {"TF-IDF", "YAKE!", "Rake", "MBERT"} settings = {"selected_scoring_methods": scores} widget = self.create_widget(OWKeywords, stored_settings=settings) @@ -191,8 +192,6 @@ def dummy_embedding(tokens, language, progress_callback=None): simulate.combobox_activate_item(cb, "Arabic") cb = widget.controls.rake_lang_index simulate.combobox_activate_item(cb, "Finnish") - cb = widget.controls.embedding_lang_index - simulate.combobox_activate_item(cb, "Kazakh") self.send_signal(widget.Inputs.corpus, self.corpus, widget=widget) self.wait_until_finished(widget=widget, timeout=10000) @@ -205,7 +204,6 @@ def dummy_embedding(tokens, language, progress_callback=None): m[3][1].assert_called_once() self.assertEqual(m[1][1].call_args[1]["language"], "Arabic") self.assertEqual(m[2][1].call_args[1]["language"], "Finnish") - self.assertEqual(m[3][1].call_args[1]["language"], "Kazakh") def test_method_change(self): """Test method change by clicking"""