Skip to content

Commit

Permalink
Keywords - replace embedding with MBERT
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Feb 28, 2023
1 parent b34576b commit 839444a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 22 deletions.
11 changes: 1 addition & 10 deletions orangecontrib/text/widgets/owkeywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,6 @@ def __on_rake_lang_changed(self):
del self.__cached_keywords[ScoringMethods.RAKE]
self.update_scores()

def __on_emb_lang_changed(self):
if ScoringMethods.EMBEDDING in self.selected_scoring_methods:
if ScoringMethods.EMBEDDING in self.__cached_keywords:
del self.__cached_keywords[ScoringMethods.EMBEDDING]
self.update_scores()

def __on_filter_changed(self):
model = self.view.model()
model.setFilterFixedString(self.__filter_line_edit.text().strip())
Expand Down Expand Up @@ -389,10 +383,7 @@ def update_scores(self):
},
ScoringMethods.RAKE: {
"language": RAKE_LANGUAGES[self.rake_lang_index],
"max_len": self.corpus.ngram_range[1] if self.corpus else 1
},
ScoringMethods.EMBEDDING: {
"language": EMBEDDING_LANGUAGES[self.embedding_lang_index],
"max_len": self.corpus.ngram_range[1] if self.corpus else 1,
},
}
self.start(run, self.corpus, self.words, self.__cached_keywords,
Expand Down
22 changes: 10 additions & 12 deletions orangecontrib/text/widgets/tests/test_owkeywords.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# pylint: disable=missing-docstring
from typing import List
import unittest
from unittest.mock import Mock, patch

Expand Down Expand Up @@ -175,24 +174,24 @@ def test_sort_nans_asc(self):

def test_scoring_methods(self):
# speed-up the test execution
def dummy_embedding(tokens, language, progress_callback=None):
return tfidf_keywords(tokens, progress_callback)

methods = [("TF-IDF", Mock(wraps=tfidf_keywords)),
("YAKE!", Mock(wraps=yake_keywords)),
("Rake", Mock(wraps=rake_keywords)),
("Embedding", Mock(side_effect=dummy_embedding))]
def dummy_mbert(tokens, progress_callback=None):
return [[("kw1", 0.2), ("kw2", 0.3)] * len(tokens)]

methods = [
("TF-IDF", Mock(wraps=tfidf_keywords)),
("YAKE!", Mock(wraps=yake_keywords)),
("Rake", Mock(wraps=rake_keywords)),
("MBERT", Mock(side_effect=dummy_mbert)),
]
with patch.object(ScoringMethods, "ITEMS", methods) as m:
scores = {"TF-IDF", "YAKE!", "Rake", "Embedding"}
scores = {"TF-IDF", "YAKE!", "Rake", "MBERT"}
settings = {"selected_scoring_methods": scores}
widget = self.create_widget(OWKeywords, stored_settings=settings)

cb = widget.controls.yake_lang_index
simulate.combobox_activate_item(cb, "Arabic")
cb = widget.controls.rake_lang_index
simulate.combobox_activate_item(cb, "Finnish")
cb = widget.controls.embedding_lang_index
simulate.combobox_activate_item(cb, "Kazakh")

self.send_signal(widget.Inputs.corpus, self.corpus, widget=widget)
self.wait_until_finished(widget=widget, timeout=10000)
Expand All @@ -205,7 +204,6 @@ def dummy_embedding(tokens, language, progress_callback=None):
m[3][1].assert_called_once()
self.assertEqual(m[1][1].call_args[1]["language"], "Arabic")
self.assertEqual(m[2][1].call_args[1]["language"], "Finnish")
self.assertEqual(m[3][1].call_args[1]["language"], "Kazakh")

def test_method_change(self):
"""Test method change by clicking"""
Expand Down

0 comments on commit 839444a

Please sign in to comment.