Skip to content

Commit

Permalink
FDR: Calculate FDR using numpy
Browse files Browse the repository at this point in the history
  • Loading branch information
VesnaT committed Feb 25, 2019
1 parent abdb14b commit 7a3e0e9
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 45 deletions.
62 changes: 19 additions & 43 deletions Orange/statistics/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
It also patches bottleneck to contain these functions.
"""
import warnings
import math
from typing import Iterable

import numpy as np
import bottleneck as bn
Expand Down Expand Up @@ -607,59 +607,35 @@ def std(x, axis=None, ddof=0):
return np.sqrt(var(x, axis=axis, ddof=ddof))


# to speed-up FDR, calculate ahead sum([1/i for i in range(1, m+1)]),
# for m in [1,100000].
# For higher values of m use an approximation, with error less or equal to
# 4.99999157277e-006. (sum([1/i for i in range(1, m+1)]) ~ log(m) + 0.5772...,
# 0.5572 is an Euler-Mascheroni constant)
c = [1.0]
for m in range(2, 100000):
c.append(c[-1] + 1.0/m)


def FDR(p_values, dependent=False, m=None, ordered=False):
def FDR(p_values: Iterable, dependent=False, m=None, ordered=False) -> Iterable:
""" `False Discovery Rate <http://en.wikipedia.org/wiki/False_discovery_rate>`_
correction on a list of p-values.
:param p_values: a list of p-values.
:param p_values: list or np.ndarray of p-values.
:param dependent: use correction for dependent hypotheses (default False).
:param m: number of hypotheses tested (default ``len(p_values)``).
:param ordered: prevent sorting of p-values if they are already sorted
(default False).
:return: list or a np.ndarray - depends on the input
"""
def is_sorted(l):
return all(l[i] <= l[i + 1] for i in range(len(l) - 1))

if not ordered:
ordered = is_sorted(p_values)
if p_values is None or len(p_values) == 0 or \
(m is not None and m <= 0):
return None

is_list = isinstance(p_values, list)
p_values = np.array(p_values) if is_list else p_values
m = len(p_values) if m is None else m
if not ordered:
joined = [(v, i) for i, v in enumerate(p_values)]
joined.sort()
p_values = [p[0] for p in joined]
indices = [p[1] for p in joined]

if not m:
m = len(p_values)
if m <= 0 or not p_values:
return []
ordered = (np.diff(p_values) >= 0).all()
if not ordered:
indices = np.argsort(p_values)
p_values = np.sort(p_values)

if dependent: # correct q for dependent tests
k = c[m-1] if m <= len(c) else math.log(m) + 0.57721566490153286060651209008240243104215933593992
m = m * k

tmp_fdrs = [p*m/(i+1.0) for (i, p) in enumerate(p_values)]
fdrs = []
cmin = tmp_fdrs[-1]
for f in reversed(tmp_fdrs):
cmin = min(f, cmin)
fdrs.append( cmin)
fdrs.reverse()
m *= sum(1 / np.arange(1, m + 1))

fdrs = (p_values * m / np.arange(1, len(p_values) + 1))[::-1]
fdrs = np.array(np.minimum.accumulate(fdrs)[::-1])
if not ordered:
new = [None] * len(fdrs)
for v, i in zip(fdrs, indices):
new[i] = v
fdrs = new

return fdrs
fdrs[indices] = fdrs.copy()
return fdrs if not is_list else list(fdrs)
29 changes: 27 additions & 2 deletions Orange/tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,36 @@ def test_nanstd_with_ddof(self):
)

def test_FDR(self):
p_values = [0.00001, 0.0001, 0.0002, 0.0003, 0.0004]
p_values = np.array([0.0002, 0.0004, 0.00001, 0.0003, 0.0001])
np.testing.assert_almost_equal(
np.array([0.00005, 0.00025, 0.00033, 0.00038, 0.0004]),
np.array([0.00033, 0.0004, 0.00005, 0.00038, 0.00025]),
FDR(p_values), decimal=5)

def test_FDR_dependent(self):
p_values = np.array([0.0002, 0.0004, 0.00001, 0.0003, 0.0001])
np.testing.assert_almost_equal(
np.array([0.00076, 0.00091, 0.00011, 0.00086, 0.00057]),
FDR(p_values, dependent=True), decimal=5)

def test_FDR_m(self):
p_values = np.array([0.0002, 0.0004, 0.00001, 0.0003, 0.0001])
np.testing.assert_almost_equal(
np.array([0.0002, 0.00024, 0.00003, 0.000225, 0.00015]),
FDR(p_values, m=3), decimal=5)

def test_FDR_no_values(self):
self.assertIsNone(FDR(None))
self.assertIsNone(FDR([]))
self.assertIsNone(FDR([0.0002, 0.0004], m=0))

def test_FDR_list(self):
p_values = [0.0002, 0.0004, 0.00001, 0.0003, 0.0001]
result = FDR(p_values)
self.assertIsInstance(result, list)
np.testing.assert_almost_equal(
np.array([0.00033, 0.0004, 0.00005, 0.00038, 0.00025]),
result, decimal=5)


class TestNanmean(unittest.TestCase):
def setUp(self):
Expand Down

0 comments on commit 7a3e0e9

Please sign in to comment.