diff --git a/orangecontrib/text/widgets/owsentimentanalysis.py b/orangecontrib/text/widgets/owsentimentanalysis.py index f9bf216cd..f81c1634f 100644 --- a/orangecontrib/text/widgets/owsentimentanalysis.py +++ b/orangecontrib/text/widgets/owsentimentanalysis.py @@ -1,21 +1,30 @@ -from typing import List +from typing import List, Type, Tuple from AnyQt.QtCore import Qt -from AnyQt.QtWidgets import QApplication, QGridLayout, QLabel +from AnyQt.QtWidgets import QGridLayout, QLabel from Orange.widgets import gui, settings +from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState from Orange.widgets.utils.signals import Input, Output from Orange.widgets.widget import OWWidget, Msg from orangecontrib.text import Corpus, preprocess -from orangecontrib.text.sentiment import VaderSentiment, LiuHuSentiment, \ - MultiSentiment, CustomDictionaries, SentiArt, MultisentimentDictionaries, \ - SentiArtDictionaries, LilahSentiment, LilahDictionaries +from orangecontrib.text.language import ISO2LANG, LANG2ISO +from orangecontrib.text.sentiment import ( + VaderSentiment, + LiuHuSentiment, + MultiSentiment, + CustomDictionaries, + SentiArt, + LilahSentiment, + DictionaryNotFound, + Sentiment, +) from orangecontrib.text.widgets.owpreprocess import FileLoader, _to_abspath from orangecontrib.text.preprocess import PreprocessorList from orangewidget.utils.filedialogs import RecentPath -class OWSentimentAnalysis(OWWidget): +class OWSentimentAnalysis(OWWidget, ConcurrentWidgetMixin): name = "Sentiment Analysis" description = "Compute sentiment from text." icon = "icons/SentimentAnalysis.svg" @@ -29,16 +38,24 @@ class Outputs: corpus = Output("Corpus", Corpus) settings_version = 1 - - method_idx = settings.Setting(1) - autocommit = settings.Setting(True) - liu_language = settings.Setting('English') - multi_language = settings.Setting('English') - senti_language = settings.Setting('English') - lilah_language = settings.Setting('Slovenian') want_main_area = False resizing_enabled = False + method_idx: int = settings.Setting(1) + autocommit: bool = settings.Setting(True) + liu_language: str = settings.Setting( + ISO2LANG[LiuHuSentiment.DEFAULT_LANG], schema_only=True + ) + multi_language: str = settings.Setting( + ISO2LANG[MultiSentiment.DEFAULT_LANG], schema_only=True + ) + senti_language: str = settings.Setting( + ISO2LANG[SentiArt.DEFAULT_LANG], schema_only=True + ) + lilah_language: str = settings.Setting( + ISO2LANG[LilahSentiment.DEFAULT_LANG], schema_only=True + ) + METHODS = [ LiuHuSentiment, VaderSentiment, @@ -47,61 +64,84 @@ class Outputs: LilahSentiment, CustomDictionaries ] - LANG = ['English', 'Slovenian'] - MULTI_LANG = MultiSentiment.LANGS.keys() - SENTI_LANG = SentiArt.LANGS.keys() - LILAH_LANG = LilahSentiment.LANGS.keys() - DEFAULT_NONE = None class Warning(OWWidget.Warning): - senti_offline = Msg('No internet connection! Sentiment now only works ' - 'with local models.') - senti_offline_no_lang = Msg('No internet connection and no local ' - 'language resources are available.') - one_dict_only = Msg(f'Only one dictionary loaded.') - no_dicts_loaded = Msg('No dictionaries loaded.') + one_dict_only = Msg(f"Only one dictionary loaded.") + no_dicts_loaded = Msg("No dictionaries loaded.") + + class Error(OWWidget.Error): + offline = Msg( + "Sentiment cannot be computed since you are offline and the " + "required dictionary is unavailable locally." + ) def __init__(self): super().__init__() + ConcurrentWidgetMixin.__init__(self) self.corpus = None self.pp_corpus = None self.pos_file = None self.neg_file = None self.senti_dict = None + # languages from workflow should be retained when data on input + self.__pend_liu_lang = self.liu_language + self.__pend_multi_lang = self.multi_language + self.__pend_senti_lang = self.senti_language + self.__pend_lilah_lang = self.lilah_language + self.form = QGridLayout() self.method_box = box = gui.radioButtonsInBox( self.controlArea, self, "method_idx", [], box="Method", orientation=self.form, callback=self._method_changed) self.liu_hu = gui.appendRadioButton(box, "Liu Hu", addToLayout=False) - self.liu_lang = gui.comboBox(None, self, 'liu_language', - sendSelectedValue=True, - contentsLength=10, - items=self.LANG, - callback=self._method_changed) + self.liu_lang = gui.comboBox( + None, + self, + "liu_language", + sendSelectedValue=True, + contentsLength=10, + items=[ISO2LANG[lg] for lg in LiuHuSentiment.LANGUAGES], + callback=self._method_changed, + ) self.vader = gui.appendRadioButton(box, "Vader", addToLayout=False) - self.multi_sent = gui.appendRadioButton(box, "Multilingual " - "sentiment", - addToLayout=False) - self.multi_box = gui.comboBox(None, self, 'multi_language', - sendSelectedValue=True, - contentsLength=10, items=[''], - callback=self._method_changed) - self.senti_art = gui.appendRadioButton(box, "SentiArt", - addToLayout=False) - self.senti_box = gui.comboBox(None, self, 'senti_language', - sendSelectedValue=True, - contentsLength=10, items=[''], - callback=self._method_changed) - self.lilah_sent = gui.appendRadioButton(box, "Lilah sentiment", - addToLayout=False) - self.lilah_box = gui.comboBox(None, self, 'lilah_language', - sendSelectedValue=True, - contentsLength=10, items=[''], - callback=self._method_changed) - self.custom_list = gui.appendRadioButton(box, "Custom dictionary", - addToLayout=False) - + self.multi_sent = gui.appendRadioButton( + box, "Multilingual " "sentiment", addToLayout=False + ) + self.multi_box = gui.comboBox( + None, + self, + "multi_language", + sendSelectedValue=True, + contentsLength=10, + items=[ISO2LANG[lg] for lg in MultiSentiment.LANGUAGES], + callback=self._method_changed, + ) + self.senti_art = gui.appendRadioButton(box, "SentiArt", addToLayout=False) + self.senti_box = gui.comboBox( + None, + self, + "senti_language", + sendSelectedValue=True, + contentsLength=10, + items=[ISO2LANG[lg] for lg in SentiArt.LANGUAGES], + callback=self._method_changed, + ) + self.lilah_sent = gui.appendRadioButton( + box, "Lilah sentiment", addToLayout=False + ) + self.lilah_box = gui.comboBox( + None, + self, + "lilah_language", + sendSelectedValue=True, + contentsLength=10, + items=[ISO2LANG[lg] for lg in LilahSentiment.LANGUAGES], + callback=self._method_changed, + ) + self.custom_list = gui.appendRadioButton( + box, "Custom dictionary", addToLayout=False + ) self.__posfile_loader = FileLoader() self.__posfile_loader.set_file_list() self.__posfile_loader.activated.connect(self.__pos_loader_activated) @@ -116,6 +156,8 @@ def __init__(self): self.form.addWidget(QLabel("Language:"), 0, 1, Qt.AlignRight) self.form.addWidget(self.liu_lang, 0, 2, Qt.AlignRight) self.form.addWidget(self.vader, 1, 0, Qt.AlignLeft) + self.form.addWidget(QLabel("Language:"), 1, 1, Qt.AlignRight) + self.form.addWidget(QLabel(" English"), 1, 2, Qt.AlignLeft) self.form.addWidget(self.multi_sent, 2, 0, Qt.AlignLeft) self.form.addWidget(QLabel("Language:"), 2, 1, Qt.AlignRight) self.form.addWidget(self.multi_box, 2, 2, Qt.AlignRight) @@ -137,28 +179,17 @@ def __init__(self): self.filegrid.addWidget(self.__negfile_loader.browse_btn, 1, 2) self.filegrid.addWidget(self.__negfile_loader.load_btn, 1, 3) - self.multi_dict = MultisentimentDictionaries() - self.senti_dict = SentiArtDictionaries() - self.lilah_dict = LilahDictionaries() - self.update_box(self.multi_box, self.multi_dict, MultiSentiment) - self.update_box(self.senti_box, self.senti_dict, SentiArt) - self.update_box(self.lilah_box, self.lilah_dict, LilahSentiment) - self.online = self.multi_dict.online - self.check_sentiment_online() - - ac = gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', - 'Autocommit is on') - ac.layout().insertSpacing(1, 8) + gui.auto_apply(self.buttonsArea, self, "autocommit") def __pos_loader_activated(self): cf = self.__posfile_loader.get_current_file() self.pos_file = cf.abspath if cf else None - self._method_changed() + self.self.commit.deferred() def __neg_loader_activated(self): cf = self.__negfile_loader.get_current_file() self.neg_file = cf.abspath if cf else None - self._method_changed() + self.self.commit.deferred() def __set_pos_path(self, path: RecentPath, paths: List[RecentPath] = []): self._posfile_loader.recent_paths = paths @@ -172,102 +203,101 @@ def __set_lx_path(self, path: RecentPath, paths: List[RecentPath] = []): self.__negfile_loader.set_current_file(_to_abspath(path)) self.neg_file = self.__negfile_loader.get_current_file() - def update_box(self, box, dictionary, method): - box.clear() - supported_languages = dictionary.supported_languages() - if supported_languages: - items = sorted([key for (key, value) in method.LANGS.items() - if value in supported_languages]) - box.addItems(items) - if "English" in items: - box.setCurrentIndex(items.index("English")) - else: - box.setCurrentIndex(items.index(items[0])) - - def check_sentiment_online(self): - current_state = self.multi_dict.online - if self.online != current_state: - self.update_box(self.multi_box, self.multi_dict, MultiSentiment) - self.update_box(self.senti_box, self.senti_dict, SentiArt) - self.update_box(self.lilah_box, self.lilah_dict, SentiArt) - self.online = current_state - - self.Warning.senti_offline.clear() - self.Warning.senti_offline_no_lang.clear() - if not current_state and self.method_idx == 2: - if self.multi_dict.supported_languages(): - self.Warning.senti_offline() - else: - self.Warning.senti_offline_no_lang() - if not current_state and self.method_idx == 3: - if self.senti_dict_dict.supported_languages(): - self.Warning.senti_offline() - else: - self.Warning.senti_offline_no_lang() - if not current_state and self.method_idx == 4: - if self.lilah_dict_dict.supported_languages(): - self.Warning.senti_offline() - else: - self.Warning.senti_offline_no_lang() - @Inputs.corpus - def set_corpus(self, data=None): - self.corpus = data + def set_corpus(self, corpus): self.pp_corpus = None - if self.corpus is not None: - if not self.corpus.has_tokens(): + if corpus is not None: + if not corpus.has_tokens(): # create preprocessed corpus upon setting data to avoid # preprocessing at each method run pp_list = [preprocess.LowercaseTransformer(), preprocess.WordPunctTokenizer()] - self.pp_corpus = PreprocessorList(pp_list)(self.corpus) + self.pp_corpus = PreprocessorList(pp_list)(corpus) else: - self.pp_corpus = self.corpus + self.pp_corpus = corpus + self.__set_language_settings() self.commit.now() + def __set_language_settings(self): + settings_ = ( + (self.__pend_liu_lang, "liu_language", LiuHuSentiment), + (self.__pend_multi_lang, "multi_language", MultiSentiment), + (self.__pend_senti_lang, "senti_language", SentiArt), + (self.__pend_lilah_lang, "lilah_language", LilahSentiment), + ) + + for l_pending, l_setting, model in settings_: + if self.pp_corpus and self.pp_corpus.language in model.LANGUAGES: + setattr(self, l_setting, ISO2LANG[self.pp_corpus.language]) + else: + # if Corpus's language not supported use default language + setattr(self, l_setting, ISO2LANG[model.DEFAULT_LANG]) + + # when workflow loaded use language saved in workflow + if l_pending is not None: + setattr(self, l_setting, l_pending) + + self.__pend_liu_lang = None + self.__pend_multi_lang = None + self.__pend_senti_lang = None + self.__pend_lilah_lang = None + def _method_changed(self): self.commit.deferred() + def _compute_sentiment(self): + method = self.METHODS[self.method_idx] + kwargs = {} + if method.name == "Liu Hu": + kwargs = dict(language=LANG2ISO[self.liu_language]) + elif method.name == "Multilingual Sentiment": + kwargs = dict(language=LANG2ISO[self.multi_language]) + elif method.name == "SentiArt": + kwargs = dict(language=LANG2ISO[self.senti_language]) + elif method.name == "LiLaH Sentiment": + kwargs = dict(language=LANG2ISO[self.lilah_language]) + elif method.name == "Custom Dictionaries": + kwargs = dict(pos=self.pos_file, neg=self.neg_file) + if bool(self.pos_file) != bool(self.neg_file): # xor: one of them None + self.Warning.one_dict_only() + elif not self.pos_file and not self.neg_file: + self.Warning.no_dicts_loaded() + + self.start(self._run, self.pp_corpus, method, kwargs) + + @staticmethod + def _run( + corpus: Corpus, + method: Type[Sentiment], + kwargs: Tuple[str], + task_state: TaskState, + ) -> Corpus: + def callback(i: float): + task_state.set_progress_value(i * 100) + if task_state.is_interruption_requested(): + raise Exception + + method = method(**kwargs) + return method.transform(corpus, callback=callback) + + def on_done(self, corpus: Corpus): + self.Outputs.corpus.send(corpus) + + def on_exception(self, ex): + if isinstance(ex, DictionaryNotFound): + self.Error.offline() + else: + raise ex + self.Outputs.corpus.send(None) + @gui.deferred def commit(self): - if self.corpus is not None: - self.Warning.senti_offline.clear() - self.Warning.one_dict_only.clear() - self.Warning.no_dicts_loaded.clear() - method = self.METHODS[self.method_idx] - corpus = self.pp_corpus - if method.name == 'Liu Hu': - out = method(language=self.liu_language).transform(corpus) - elif method.name == 'Multilingual Sentiment': - if not self.senti_dict.online: - self.Warning.senti_offline() - self.update_box(self.multi_box, self.multi_dict, MultiSentiment) - return - else: - out = method(language=self.multi_language).transform(corpus) - elif method.name == 'SentiArt': - if not self.senti_dict.online: - self.Warning.senti_offline() - self.update_box(self.senti_box, self.senti_dict, SentiArt) - return - out = method(language=self.senti_language).transform(corpus) - elif method.name == 'Lilah Sentiment': - if not self.lilah_dict.online: - self.Warning.senti_offline() - self.update_box(self.lilah_box, self.lilah_dict, - LilahSentiment) - return - out = method(language=self.lilah_language).transform(corpus) - elif method.name == 'Custom Dictionaries': - out = method(self.pos_file, self.neg_file).transform(corpus) - if (self.pos_file and not self.neg_file) or \ - (self.neg_file and not self.pos_file): - self.Warning.one_dict_only() - if not self.pos_file and not self.neg_file: - self.Warning.no_dicts_loaded() - else: - out = method().transform(corpus) - self.Outputs.corpus.send(out) + self.Error.offline.clear() + self.Warning.one_dict_only.clear() + self.Warning.no_dicts_loaded.clear() + + if self.pp_corpus is not None: + self._compute_sentiment() else: self.Outputs.corpus.send(None) @@ -285,15 +315,7 @@ def migrate_settings(cls, settings, version): settings["metric_idx"] = 5 -def main(): - app = QApplication([]) - widget = OWSentimentAnalysis() - corpus = Corpus.from_file('book-excerpts') - corpus = corpus[:3] - widget.set_corpus(corpus) - widget.show() - app.exec() - - if __name__ == '__main__': - main() + from orangewidget.utils.widgetpreview import WidgetPreview + + WidgetPreview(OWSentimentAnalysis).run(Corpus.from_file("book-excerpts")[:3]) diff --git a/orangecontrib/text/widgets/tests/test_owsentimentanalysis.py b/orangecontrib/text/widgets/tests/test_owsentimentanalysis.py index 8c5e9d218..1ff2dddc0 100644 --- a/orangecontrib/text/widgets/tests/test_owsentimentanalysis.py +++ b/orangecontrib/text/widgets/tests/test_owsentimentanalysis.py @@ -1,15 +1,18 @@ import os import unittest -from unittest import mock from unittest.mock import patch import numpy as np +from AnyQt.QtWidgets import QRadioButton from numpy import array_equal + from Orange.widgets.tests.base import WidgetTest from Orange.widgets.tests.utils import simulate from orangecontrib.text import preprocess from orangecontrib.text.corpus import Corpus +from orangecontrib.text.language import ISO2LANG +from orangecontrib.text.sentiment import DictionaryNotFound from orangecontrib.text.widgets.owsentimentanalysis import OWSentimentAnalysis MS_FILES = [ @@ -33,6 +36,7 @@ } MOCK_FUN = "orangecontrib.text.sentiment.serverfiles.ServerFiles.listfiles" + def dummy_listfiles(sf): return LISTFILES[sf.server] @@ -118,7 +122,7 @@ def test_output(self): def test_language_changed(self): """Test if output changes on language change""" self.send_signal(self.widget.Inputs.corpus, self.corpus) - self.assertEqual(self.widget.multi_box.count(), 5) + self.assertEqual(self.widget.multi_box.count(), 45) # default for Liu Hu should be English self.widget.liu_hu.click() @@ -129,18 +133,6 @@ def test_language_changed(self): output_slo = self.get_output(self.widget.Outputs.corpus) self.assertFalse(array_equal(output_eng.X, output_slo.X)) - def test_sentiment_offline(self): - """Test if sentiment works with offline lexicons""" - with patch( - "orangecontrib.text.sentiment.SentimentDictionaries.online", - new_callable=mock.PropertyMock, - return_value=False, - ): - widget = self.create_widget(OWSentimentAnalysis) - self.send_signal(widget.Inputs.corpus, self.corpus) - widget.multi_sent.click() - self.assertTrue(widget.Warning.senti_offline.is_shown()) - def test_no_file_warnings(self): widget = self.create_widget(OWSentimentAnalysis) self.send_signal(widget.Inputs.corpus, self.corpus) @@ -186,6 +178,70 @@ def test_preprocessed(self): self.send_signal(widget.Inputs.corpus, None) self.assertIsNone(widget.pp_corpus) + def test_language_from_corpus(self): + # models order: ["Liu Hu", "Vader", "Multilingual", "SentiArt", "Lilah"] + # supported and not supported languages for each model + w = self.widget + settings = [ + w.liu_language, + "English", + w.multi_language, + w.senti_language, + w.lilah_language, + ] + supported = ["en", "en", "en", "en", "sl"] + unsupported = ["am", "sl", "am", "sl", "am"] + + for i, (sett, s, ns) in enumerate(zip(settings, supported, unsupported)): + # try with supported language + self.corpus.attributes["language"] = s + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.widget.findChildren(QRadioButton)[i].click() + self.assertIsNotNone(self.get_output(self.widget.Outputs.corpus)) + self.assertEqual(ISO2LANG[s], sett) + + # try with unsupported language - use default language istead + self.corpus.attributes["language"] = ns + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.assertIsNotNone(self.get_output(self.widget.Outputs.corpus)) + self.assertEqual(s, "en" if i != 4 else "sl") + + def test_language_from_settings(self): + self.send_signal(self.widget.Inputs.corpus, self.corpus) + simulate.combobox_activate_item(self.widget.liu_lang, "Slovenian") + simulate.combobox_activate_item(self.widget.multi_box, "Spanish") + simulate.combobox_activate_item(self.widget.senti_box, "German") + simulate.combobox_activate_item(self.widget.lilah_box, "Croatian") + + self.assertEqual("Slovenian", self.widget.liu_language) + self.assertEqual("Spanish", self.widget.multi_language) + self.assertEqual("German", self.widget.senti_language) + self.assertEqual("Croatian", self.widget.lilah_language) + settings = self.widget.settingsHandler.pack_data(self.widget) + + widget = self.create_widget(OWSentimentAnalysis, stored_settings=settings) + self.send_signal(widget.Inputs.corpus, self.corpus, widget=widget) + self.assertEqual("Slovenian", widget.liu_language) + self.assertEqual("Spanish", widget.multi_language) + self.assertEqual("German", widget.senti_language) + self.assertEqual("Croatian", widget.lilah_language) + + def test_dictionary_offline(self): + """Test case when offline and dictionary not found locally""" + with patch( + "orangecontrib.text.sentiment.MultisentimentDictionaries.__getitem__", + side_effect=DictionaryNotFound, + ): + self.send_signal(self.widget.Inputs.corpus, self.corpus) + self.widget.multi_sent.click() + simulate.combobox_activate_item(self.widget.multi_box, "Afrikaans") + self.assertIsNone(self.get_output(self.widget.Outputs.corpus)) + self.assertTrue(self.widget.Error.offline.is_shown()) + + simulate.combobox_activate_item(self.widget.multi_box, "English") + self.assertIsNotNone(self.get_output(self.widget.Outputs.corpus)) + self.assertFalse(self.widget.Error.offline.is_shown()) + if __name__ == "__main__": unittest.main()