From 1560eb7ce578ac2f57cad993e2867d2d743885d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Primo=C5=BE=20Godec?= Date: Mon, 30 Dec 2019 14:29:47 +0100 Subject: [PATCH] Tests for Word Enrichment --- .../widgets/tests/test_owwordenrichment.py | 173 ++++++++++++++++-- 1 file changed, 158 insertions(+), 15 deletions(-) diff --git a/orangecontrib/text/widgets/tests/test_owwordenrichment.py b/orangecontrib/text/widgets/tests/test_owwordenrichment.py index d55e925cf..c469bb5da 100644 --- a/orangecontrib/text/widgets/tests/test_owwordenrichment.py +++ b/orangecontrib/text/widgets/tests/test_owwordenrichment.py @@ -1,5 +1,8 @@ import unittest +from unittest.mock import Mock +import Orange +from Orange.data import Table, Domain from Orange.widgets.tests.base import WidgetTest from orangecontrib.text.corpus import Corpus @@ -9,49 +12,50 @@ class TestWordEnrichment(WidgetTest): def setUp(self): - # type: OWWordEnrichment self.widget = self.create_widget(OWWordEnrichment) - self.corpus = Corpus.from_file('book-excerpts') + corpus = Corpus.from_file('book-excerpts')[::3] vect = BowVectorizer() - self.corpus_vect = vect.transform(self.corpus) + self.corpus_vect = vect.transform(corpus) + self.subset_corpus = self.corpus_vect[:5] + @unittest.skipIf( + Orange.__version__ < "24.0", "wait_until_finished not supported") def test_filter_fdr(self): widget = self.widget - subset_corpus = self.corpus_vect[:10] + self.send_signal(widget.Inputs.data, self.corpus_vect) - self.send_signal(widget.Inputs.selected_data, subset_corpus) + self.send_signal(widget.Inputs.selected_data, self.subset_corpus) + self.wait_until_finished(timeout=10000) # test p-value filter widget.filter_by_p = True - widget.filter_p_value = 1e-9 + widget.filter_p_value = 1e-3 widget.filter_by_fdr = False widget.filter_fdr_value = 0.01 widget.filter_and_display() self.assertEqual(widget.sig_words.topLevelItemCount(), 3) self.assertEqual({widget.sig_words.topLevelItem(i).text(0) - for i in (0, 1, 2)}, {'livesey', 'doctor', 'rum'}) + for i in (0, 1, 2)}, {'livesey', 'jim', 'doctor'}) # test fdr filter widget.filter_by_p = True - widget.filter_p_value = 1e-4 + widget.filter_p_value = 1e-1 widget.filter_by_fdr = True - widget.filter_fdr_value = 1e-4 + widget.filter_fdr_value = 0.9 widget.filter_and_display() - self.assertEqual(widget.sig_words.topLevelItemCount(), 5) - self.assertEqual({widget.sig_words.topLevelItem(i).text(0) - for i in (0, 1, 2, 3, 4)}, - {'livesey', 'doctor', 'rum', 'admiral', 'inn'}) + self.assertEqual(widget.sig_words.topLevelItemCount(), 1) + self.assertEqual(widget.sig_words.topLevelItem(0).text(0), "doctor") # test if different when fdr false widget.filter_by_p = True - widget.filter_p_value = 1e-4 + widget.filter_p_value = 1e-1 widget.filter_by_fdr = False widget.filter_fdr_value = 1e-4 widget.filter_and_display() - self.assertEqual(widget.sig_words.topLevelItemCount(), 16) + self.assertEqual(widget.sig_words.topLevelItemCount(), 108) # test no results widget.filter_by_p = True @@ -62,6 +66,145 @@ def test_filter_fdr(self): widget.filter_and_display() self.assertEqual(widget.sig_words.topLevelItemCount(), 0) + self.send_signal(widget.Inputs.data, None) + widget.filter_and_display() + self.assertEqual(widget.sig_words.topLevelItemCount(), 0) + + def test_empty_selection(self): + w = self.widget + + # empty selection + self.send_signal(w.Inputs.data, self.corpus_vect) + self.send_signal(w.Inputs.selected_data, self.subset_corpus[:0]) + self.assertTrue(self.widget.Error.empty_selection.is_shown()) + + # selection not empty + self.send_signal(w.Inputs.selected_data, self.subset_corpus) + self.assertFalse(self.widget.Error.empty_selection.is_shown()) + + def test_no_bow_features(self): + w = self.widget + + iris = Table("iris") + self.send_signal(w.Inputs.data, iris) + self.send_signal(w.Inputs.selected_data, iris[:10]) + self.assertTrue(self.widget.Error.no_bow_features.is_shown()) + + self.send_signal(w.Inputs.data, None) + self.send_signal(w.Inputs.selected_data, None) + self.assertFalse(self.widget.Error.no_bow_features.is_shown()) + + def test_all_selected(self): + w = self.widget + + self.send_signal(w.Inputs.data, self.corpus_vect) + self.send_signal(w.Inputs.selected_data, self.corpus_vect) + self.assertTrue(self.widget.Error.all_selected.is_shown()) + + self.send_signal(w.Inputs.data, None) + self.send_signal(w.Inputs.selected_data, None) + self.assertFalse(self.widget.Error.all_selected.is_shown()) + + def test_no_overlaping(self): + w = self.widget + + # with one column bow it is easier + corpus_vect = Corpus.from_table(Domain( + self.corpus_vect.domain.attributes[:1], + self.corpus_vect.domain.class_var, + self.corpus_vect.domain.metas + ), self.corpus_vect) + + self.send_signal(w.Inputs.data, corpus_vect[10:15]) + self.send_signal(w.Inputs.selected_data, corpus_vect[4:5]) + self.assertTrue(self.widget.Error.no_words_overlap.is_shown()) + + self.send_signal(w.Inputs.selected_data, self.subset_corpus) + self.send_signal(w.Inputs.data, self.corpus_vect) + self.assertFalse(self.widget.Error.no_words_overlap.is_shown()) + + def test_input_info(self): + w = self.widget + input_sum = w.info.set_input_summary = Mock() + + self.send_signal(w.Inputs.selected_data, self.subset_corpus) + self.send_signal(w.Inputs.data, self.corpus_vect) + + input_sum.assert_called_with( + "5923|1204", "Cluster words: 5923\nSelected words: 1204") + + self.send_signal(w.Inputs.selected_data, None) + self.send_signal(w.Inputs.data, None) + input_sum.assert_called_with(w.info.NoInput) + + @unittest.skipIf( + Orange.__version__ < "24.0", "wait_until_finished not supported") + def test_output_info(self): + w = self.widget + output_sum = w.info.set_output_summary = Mock() + w.filter_p_value = 1e-3 + w.filter_by_p = True + w.filter_by_fdr = False + + self.send_signal(w.Inputs.selected_data, self.subset_corpus) + self.send_signal(w.Inputs.data, self.corpus_vect) + self.wait_until_finished(timeout=10000) + + output_sum.assert_called_with("3", "Words after filtering: 3") + + # test fdr filter + w.filter_by_p = True + w.filter_p_value = 1e-4 + w.filter_by_fdr = True + w.filter_fdr_value = 1e-4 + w.filter_and_display() + output_sum.assert_called_with("0", "Words after filtering: 0") + + self.send_signal(w.Inputs.selected_data, None) + self.send_signal(w.Inputs.data, None) + output_sum.assert_called_with(w.info.NoOutput) + + @unittest.skipIf( + Orange.__version__ < "24.0", "wait_until_finished not supported") + def test_filter_changed(self): + """ + This case tests whether function are correctly triggered when + values in filter field changes + """ + w = self.widget + + self.send_signal(w.Inputs.data, self.corpus_vect) + self.send_signal(w.Inputs.selected_data, self.subset_corpus) + self.wait_until_finished(timeout=10000) + + # test p-value filter + w.controls.filter_by_p.click() # set to true + w.controls.filter_p_value.valueChanged.emit(1e-3) + w.controls.filter_by_fdr.click() # set to false + w.controls.filter_fdr_value.valueChanged.emit(0.1) + + self.assertEqual(w.sig_words.topLevelItemCount(), 3) + self.assertEqual({w.sig_words.topLevelItem(i).text(0) + for i in (0, 1, 2)}, {'livesey', 'jim', 'doctor'}) + + # # test fdr filter + w.controls.filter_p_value.valueChanged.emit(1e-1) + w.controls.filter_by_fdr.click() # set to True + w.controls.filter_fdr_value.valueChanged.emit(0.9) + + self.assertEqual(w.sig_words.topLevelItemCount(), 1) + self.assertEqual(w.sig_words.topLevelItem(0).text(0), "doctor") + + # test if different when fdr false + w.controls.filter_by_fdr.click() # set to False + + self.assertEqual(w.sig_words.topLevelItemCount(), 108) + + # # # test no results + w.controls.filter_p_value.valueChanged.emit(1e-11) + + self.assertEqual(w.sig_words.topLevelItemCount(), 0) + if __name__ == "__main__": unittest.main()