Skip to content

Commit

Permalink
Add test for FDR and p-value filters
Browse files Browse the repository at this point in the history
  • Loading branch information
ajdapretnar committed May 27, 2019
1 parent d1f63b6 commit 27cd416
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 31 deletions.
32 changes: 1 addition & 31 deletions orangecontrib/text/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import scipy.sparse as sp

from orangecontrib.text.stats import hypergeom_p_values, false_discovery_rate, is_sorted
from orangecontrib.text.stats import hypergeom_p_values, is_sorted

class StatsTests(unittest.TestCase):
x = np.array([[0, 0, 9, 0, 1],
Expand All @@ -29,36 +29,6 @@ def test_hypergeom_p_values(self):
with self.assertRaises(ValueError):
hypergeom_p_values(self.x, self.x[-2:, :-1])


def test_false_discovery_rate(self):
p_values = np.array(
[0.727, 0.281, 0.791, 0.034, 0.628, 0.743, 0.958, 0.552, 0.867, 0.606,
0.611, 0.594, 0.071, 0.517, 0.526, 0.526, 0.635, 0.932, 0.210, 0.636])
# calculated with http://www.sdmproject.com/utilities/?show=FDR
fdr_fixed = np.array(
[0.92875, 0.9085714, 0.9305882, 0.68, 0.9085714, 0.92875, 0.958, 0.9085714,
0.958, 0.9085714, 0.9085714, 0.9085714, 0.71, 0.9085714, 0.9085714, 0.9085714,
0.9085714, 0.958, 0.9085714, 0.9085714]
)
corrected = false_discovery_rate(p_values)
np.testing.assert_allclose(corrected, fdr_fixed)

corrected = false_discovery_rate(p_values, m=len(p_values))
np.testing.assert_allclose(corrected, fdr_fixed)

corrected = false_discovery_rate(sorted(p_values), ordered=True)
np.testing.assert_allclose(sorted(corrected), sorted(fdr_fixed))

np.testing.assert_equal(false_discovery_rate([]), [])
np.testing.assert_equal(false_discovery_rate(p_values, m=-1), [])

dependant = [3.3414007065721947, 3.2688034599191167, 3.3480141985890031, 2.446462966857704,
3.2688034599191167, 3.3414007065721947, 3.4466345915436469, 3.2688034599191167,
3.4466345915436469, 3.2688034599191167, 3.2688034599191167, 3.2688034599191167,
2.554395156572014, 3.2688034599191167, 3.2688034599191167, 3.2688034599191167,
3.2688034599191167, 3.4466345915436469, 3.2688034599191167, 3.2688034599191167]
np.testing.assert_equal(false_discovery_rate(p_values, dependent=True), dependant)

def test_is_sorted(self):
self.assertTrue(is_sorted(range(10)))
self.assertFalse(is_sorted(range(10)[::-1]))
67 changes: 67 additions & 0 deletions orangecontrib/text/widgets/tests/test_owwordenrichment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import unittest

from Orange.widgets.tests.base import WidgetTest

from orangecontrib.text.corpus import Corpus
from orangecontrib.text.vectorization import BowVectorizer
from orangecontrib.text.widgets.owwordenrichment import OWWordEnrichment


class TestWordEnrichment(WidgetTest):
def setUp(self):
# type: OWWordEnrichment
self.widget = self.create_widget(OWWordEnrichment)
self.corpus = Corpus.from_file('book-excerpts')
vect = BowVectorizer()
self.corpus_vect = vect.transform(self.corpus)

def test_filter_fdr(self):
widget = self.widget
subset_corpus = self.corpus_vect[:10]
self.send_signal(widget.Inputs.data, self.corpus_vect)
self.send_signal(widget.Inputs.selected_data, subset_corpus)

# test p-value filter
widget.filter_by_p = True
widget.filter_p_value = 1e-9
widget.filter_by_fdr = False
widget.filter_fdr_value = 0.01

widget.filter_and_display()
self.assertEqual(widget.sig_words.topLevelItemCount(), 3)
self.assertEqual({widget.sig_words.topLevelItem(i).text(0)
for i in (0, 1, 2)}, {'livesey', 'doctor', 'rum'})

# test fdr filter
widget.filter_by_p = True
widget.filter_p_value = 1e-4
widget.filter_by_fdr = True
widget.filter_fdr_value = 1e-4

widget.filter_and_display()
self.assertEqual(widget.sig_words.topLevelItemCount(), 5)
self.assertEqual({widget.sig_words.topLevelItem(i).text(0)
for i in (0, 1, 2, 3, 4)},
{'livesey', 'doctor', 'rum', 'admiral', 'inn'})

# test if different when fdr false
widget.filter_by_p = True
widget.filter_p_value = 1e-4
widget.filter_by_fdr = False
widget.filter_fdr_value = 1e-4

widget.filter_and_display()
self.assertEqual(widget.sig_words.topLevelItemCount(), 16)

# test no results
widget.filter_by_p = True
widget.filter_p_value = 1e-11
widget.filter_by_fdr = False
widget.filter_fdr_value = 1e-5

widget.filter_and_display()
self.assertEqual(widget.sig_words.topLevelItemCount(), 0)


if __name__ == "__main__":
unittest.main()

0 comments on commit 27cd416

Please sign in to comment.