From 27cd41633f8b5b3f57adde79f50ca79f20af01d6 Mon Sep 17 00:00:00 2001 From: Ajda Pretnar Date: Tue, 19 Mar 2019 14:03:11 +0100 Subject: [PATCH] Add test for FDR and p-value filters --- orangecontrib/text/tests/test_stats.py | 32 +-------- .../widgets/tests/test_owwordenrichment.py | 67 +++++++++++++++++++ 2 files changed, 68 insertions(+), 31 deletions(-) diff --git a/orangecontrib/text/tests/test_stats.py b/orangecontrib/text/tests/test_stats.py index ada06fe07..edad2df07 100644 --- a/orangecontrib/text/tests/test_stats.py +++ b/orangecontrib/text/tests/test_stats.py @@ -2,7 +2,7 @@ import numpy as np import scipy.sparse as sp -from orangecontrib.text.stats import hypergeom_p_values, false_discovery_rate, is_sorted +from orangecontrib.text.stats import hypergeom_p_values, is_sorted class StatsTests(unittest.TestCase): x = np.array([[0, 0, 9, 0, 1], @@ -29,36 +29,6 @@ def test_hypergeom_p_values(self): with self.assertRaises(ValueError): hypergeom_p_values(self.x, self.x[-2:, :-1]) - - def test_false_discovery_rate(self): - p_values = np.array( - [0.727, 0.281, 0.791, 0.034, 0.628, 0.743, 0.958, 0.552, 0.867, 0.606, - 0.611, 0.594, 0.071, 0.517, 0.526, 0.526, 0.635, 0.932, 0.210, 0.636]) - # calculated with http://www.sdmproject.com/utilities/?show=FDR - fdr_fixed = np.array( - [0.92875, 0.9085714, 0.9305882, 0.68, 0.9085714, 0.92875, 0.958, 0.9085714, - 0.958, 0.9085714, 0.9085714, 0.9085714, 0.71, 0.9085714, 0.9085714, 0.9085714, - 0.9085714, 0.958, 0.9085714, 0.9085714] - ) - corrected = false_discovery_rate(p_values) - np.testing.assert_allclose(corrected, fdr_fixed) - - corrected = false_discovery_rate(p_values, m=len(p_values)) - np.testing.assert_allclose(corrected, fdr_fixed) - - corrected = false_discovery_rate(sorted(p_values), ordered=True) - np.testing.assert_allclose(sorted(corrected), sorted(fdr_fixed)) - - np.testing.assert_equal(false_discovery_rate([]), []) - np.testing.assert_equal(false_discovery_rate(p_values, m=-1), []) - - dependant = [3.3414007065721947, 3.2688034599191167, 3.3480141985890031, 2.446462966857704, - 3.2688034599191167, 3.3414007065721947, 3.4466345915436469, 3.2688034599191167, - 3.4466345915436469, 3.2688034599191167, 3.2688034599191167, 3.2688034599191167, - 2.554395156572014, 3.2688034599191167, 3.2688034599191167, 3.2688034599191167, - 3.2688034599191167, 3.4466345915436469, 3.2688034599191167, 3.2688034599191167] - np.testing.assert_equal(false_discovery_rate(p_values, dependent=True), dependant) - def test_is_sorted(self): self.assertTrue(is_sorted(range(10))) self.assertFalse(is_sorted(range(10)[::-1])) diff --git a/orangecontrib/text/widgets/tests/test_owwordenrichment.py b/orangecontrib/text/widgets/tests/test_owwordenrichment.py index e69de29bb..d55e925cf 100644 --- a/orangecontrib/text/widgets/tests/test_owwordenrichment.py +++ b/orangecontrib/text/widgets/tests/test_owwordenrichment.py @@ -0,0 +1,67 @@ +import unittest + +from Orange.widgets.tests.base import WidgetTest + +from orangecontrib.text.corpus import Corpus +from orangecontrib.text.vectorization import BowVectorizer +from orangecontrib.text.widgets.owwordenrichment import OWWordEnrichment + + +class TestWordEnrichment(WidgetTest): + def setUp(self): + # type: OWWordEnrichment + self.widget = self.create_widget(OWWordEnrichment) + self.corpus = Corpus.from_file('book-excerpts') + vect = BowVectorizer() + self.corpus_vect = vect.transform(self.corpus) + + def test_filter_fdr(self): + widget = self.widget + subset_corpus = self.corpus_vect[:10] + self.send_signal(widget.Inputs.data, self.corpus_vect) + self.send_signal(widget.Inputs.selected_data, subset_corpus) + + # test p-value filter + widget.filter_by_p = True + widget.filter_p_value = 1e-9 + widget.filter_by_fdr = False + widget.filter_fdr_value = 0.01 + + widget.filter_and_display() + self.assertEqual(widget.sig_words.topLevelItemCount(), 3) + self.assertEqual({widget.sig_words.topLevelItem(i).text(0) + for i in (0, 1, 2)}, {'livesey', 'doctor', 'rum'}) + + # test fdr filter + widget.filter_by_p = True + widget.filter_p_value = 1e-4 + widget.filter_by_fdr = True + widget.filter_fdr_value = 1e-4 + + widget.filter_and_display() + self.assertEqual(widget.sig_words.topLevelItemCount(), 5) + self.assertEqual({widget.sig_words.topLevelItem(i).text(0) + for i in (0, 1, 2, 3, 4)}, + {'livesey', 'doctor', 'rum', 'admiral', 'inn'}) + + # test if different when fdr false + widget.filter_by_p = True + widget.filter_p_value = 1e-4 + widget.filter_by_fdr = False + widget.filter_fdr_value = 1e-4 + + widget.filter_and_display() + self.assertEqual(widget.sig_words.topLevelItemCount(), 16) + + # test no results + widget.filter_by_p = True + widget.filter_p_value = 1e-11 + widget.filter_by_fdr = False + widget.filter_fdr_value = 1e-5 + + widget.filter_and_display() + self.assertEqual(widget.sig_words.topLevelItemCount(), 0) + + +if __name__ == "__main__": + unittest.main()