From 0c3ebbfc485c4f09a7ba224270926ffe70f4ff2b Mon Sep 17 00:00:00 2001 From: Mark Keller <7525285+keller-mark@users.noreply.github.com> Date: Thu, 11 Jun 2020 21:49:22 -0400 Subject: [PATCH] bbi.fetch with summary="sum" (#12) * Sum aggregation mode * Missed a line * Test * Use higher zoom level in test * Actually do sum in test * Improve test * Whitespace --- bbi/__init__.py | 2 +- bbi/cbbi.pxd | 1 + bbi/cbbi.pyx | 4 ++++ include/bbiFile.h | 1 + src/bbiRead.c | 7 +++++++ tests/test_bbi.py | 4 ++++ 6 files changed, 18 insertions(+), 1 deletion(-) diff --git a/bbi/__init__.py b/bbi/__init__.py index 69dece9..2e9b1a9 100644 --- a/bbi/__init__.py +++ b/bbi/__init__.py @@ -15,4 +15,4 @@ __all__ = ['is_bbi', 'is_bigwig', 'is_bigbed', 'info', 'zooms', 'chromsizes', 'fetch', 'stackup', 'fetch_intervals'] -__version__ = '0.2.2' +__version__ = '0.2.3' diff --git a/bbi/cbbi.pxd b/bbi/cbbi.pxd index f5fcf02..94122f3 100644 --- a/bbi/cbbi.pxd +++ b/bbi/cbbi.pxd @@ -122,6 +122,7 @@ cdef extern from "bbiFile.h": bbiSumMin = 2 bbiSumCoverage = 3 bbiSumStandardDeviation = 4 + bbiSumSum = 5 cdef struct bbiSummary: bbiSummary *next diff --git a/bbi/cbbi.pyx b/bbi/cbbi.pyx index aeaddea..7856618 100644 --- a/bbi/cbbi.pyx +++ b/bbi/cbbi.pyx @@ -78,6 +78,7 @@ cpdef dict BBI_SUMMARY_TYPES = { 'min': bbiSumMin, 'cov': bbiSumCoverage, 'std': bbiSumStandardDeviation, + 'sum': bbiSumSum, } @@ -254,6 +255,7 @@ def info(str inFile): 'std': sqrt(var_from_sums(summ.sumData, summ.sumSquares, summ.validCount)), + 'sum': summ.sumData, } } bbiFileClose(&bbi) @@ -731,6 +733,8 @@ cdef inline void array_query_summarized( val = sqrt(var_from_sums(el.sumData, el.sumSquares, el.validCount)) + elif summaryType == bbiSumSum: + val = el.sumData else: raise RuntimeError out[i] = val diff --git a/include/bbiFile.h b/include/bbiFile.h index 0746afa..0dd3b66 100644 --- a/include/bbiFile.h +++ b/include/bbiFile.h @@ -200,6 +200,7 @@ enum bbiSummaryType bbiSumMin = 2, /* Minimum value */ bbiSumCoverage = 3, /* Bases in region containing actual data. */ bbiSumStandardDeviation = 4, /* Standard deviation in window. */ + bbiSumSum = 5, /* Sum value */ }; enum bbiSummaryType bbiSummaryTypeFromString(char *string); diff --git a/src/bbiRead.c b/src/bbiRead.c index caf2d30..9f1d153 100644 --- a/src/bbiRead.c +++ b/src/bbiRead.c @@ -280,6 +280,8 @@ else if (sameWord(string, "coverage") || sameWord(string, "dataCoverage")) return bbiSumCoverage; else if (sameWord(string, "std")) return bbiSumStandardDeviation; +else if (sameWord(string, "sum")) + return bbiSumSum; else { errAbort("Unknown bbiSummaryType %s", string); @@ -302,6 +304,8 @@ switch (type) return "coverage"; case bbiSumStandardDeviation: return "std"; + case bbiSumSum: + return "sum"; default: errAbort("Unknown bbiSummaryType %d", (int)type); return NULL; @@ -691,6 +695,9 @@ if (ret) case bbiSumStandardDeviation: val = calcStdFromSums(el->sumData, el->sumSquares, el->validCount); break; + case bbiSumSum: + val = el->sumData; + break; default: internalErr(); val = 0.0; diff --git a/tests/test_bbi.py b/tests/test_bbi.py index bad7424..8c7f24f 100644 --- a/tests/test_bbi.py +++ b/tests/test_bbi.py @@ -120,6 +120,10 @@ def test_fetch_summary_stats(path): path, 'chr21', 20000000, 20001000, bins=10, summary='max' ).max() assert np.isclose(vmax, np.max(values)) + vsum = bbi.fetch(path, 'chr21', 20000000, 20001000, bins=100, summary='sum') + values_sum_every_ten = np.reshape(values, (-1, 10)).sum(axis=-1) + assert len(vsum) == len(values_sum_every_ten) + assert np.allclose(vsum, values_sum_every_ten) with pytest.raises(ValueError): bbi.fetch(path, 'chr21', 20000000, 20001000, bins=10, summary='foo')