From ac50b179d0345c405712f174f465854c1a4dc030 Mon Sep 17 00:00:00 2001 From: Nezar Date: Mon, 16 Sep 2019 20:24:02 -0400 Subject: [PATCH 01/10] Set bin1 to be tile i-th coord (vertical) and bin2 to be tile j-th (horizontal) --- clodius/tiles/cooler.py | 74 ++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/clodius/tiles/cooler.py b/clodius/tiles/cooler.py index e4c21a73..0bc4e477 100644 --- a/clodius/tiles/cooler.py +++ b/clodius/tiles/cooler.py @@ -62,7 +62,8 @@ def get_chromosome_names_cumul_lengths(c): return chrom_names, chrom_sizes, chrom_cum_lengths -def get_data(f, start_pos_1, end_pos_1, start_pos_2, end_pos_2, transform='default', resolution=None): +def get_data(f, start_pos_1, end_pos_1, start_pos_2, end_pos_2, + transform='default', resolution=None): """Get balanced pixel data. Args: @@ -180,7 +181,7 @@ def _get_info_multi_v1(file_path): c = cooler.Cooler(f["0"]) - (chroms, chrom_sizes, chrom_cum_lengths) = get_chromosome_names_cumul_lengths(c) + chroms, chrom_sizes, chrom_cum_lengths = get_chromosome_names_cumul_lengths(c) total_length = int(chrom_cum_lengths[-1]) max_zoom = f.attrs['max-zoom'] @@ -228,7 +229,8 @@ def get_quadtree_depth(chromsizes, binsize): def get_zoom_resolutions(chromsizes, base_res): - return [base_res * 2**x for x in range(get_quadtree_depth(chromsizes, base_res) + 1)] + return [base_res * 2**x + for x in range(get_quadtree_depth(chromsizes, base_res) + 1)] def print_zoom_resolutions(chromsizes_file, base_res): @@ -242,7 +244,8 @@ def print_zoom_resolutions(chromsizes_file, base_res): print(','.join(str(res) for res in resolutions)) -def make_tiles(hdf_for_resolution, resolution, x_pos, y_pos, transform_type='default', x_width=1, y_width=1): +def make_tiles(hdf_for_resolution, resolution, x_pos, y_pos, + transform_type='default', x_width=1, y_width=1): ''' Generate tiles for a given location. This function retrieves tiles for a rectangular region of width x_width and height y_width @@ -271,19 +274,13 @@ def make_tiles(hdf_for_resolution, resolution, x_pos, y_pos, transform_type='def tile_size = resolution * BINS_PER_TILE - start1 = x_pos * tile_size - end1 = (x_pos + x_width) * tile_size - start2 = y_pos * tile_size - end2 = (y_pos + y_width) * tile_size - - # print("resolution:", resolution) - # print("tile_size:", tile_size) - # print("transform_type:", transform_type); - # print('start1:', start1, end1) - # print('start2:', start2, end2) + start2 = x_pos * tile_size + end2 = (x_pos + x_width) * tile_size + start1 = y_pos * tile_size + end1 = (y_pos + y_width) * tile_size c = cooler.Cooler(hdf_for_resolution) - (chroms, chrom_sizes, chrom_cum_lengths) = get_chromosome_names_cumul_lengths(c) + chroms, chrom_sizes, chrom_cum_lengths = get_chromosome_names_cumul_lengths(c) total_length = sum(chrom_sizes.values()) @@ -292,28 +289,16 @@ def make_tiles(hdf_for_resolution, resolution, x_pos, y_pos, transform_type='def transform_type, resolution=resolution ) - # print('start1', start1, 'end1', end1, 'weight', len(weight1), 'end1 - start1 / tile_size', (end1 - start1) / resolution) - - # print("data:", data) - - # print("x_width:", x_width) - # print("y_width:", y_width) # split out the individual tiles data_by_tilepos = {} for x_offset in range(0, x_width): for y_offset in range(0, y_width): - start1 = (x_pos + x_offset) * tile_size - end1 = (x_pos + x_offset + 1) * tile_size - start2 = (y_pos + y_offset) * tile_size - end2 = (y_pos + y_offset + 1) * tile_size - - # print("resolution:", resolution) - # print("tile_size", tile_size) - # print("x_pos:", x_pos, "x_offset", x_offset) - # print("start1", start1, 'end1', end1) - # print("start2", start2, 'end2', end2) + start2 = (x_pos + x_offset) * tile_size + end2 = (x_pos + x_offset + 1) * tile_size + start1 = (y_pos + y_offset) * tile_size + end1 = (y_pos + y_offset + 1) * tile_size df = data[data['genome_start1'] >= start1] df = df[df['genome_start1'] < end1] @@ -323,8 +308,8 @@ def make_tiles(hdf_for_resolution, resolution, x_pos, y_pos, transform_type='def binsize = resolution - j = ((df['genome_start1'].values - start1) // binsize).astype(int) - i = ((df['genome_start2'].values - start2) // binsize).astype(int) + i = ((df['genome_start1'].values - start1) // binsize).astype(int) + j = ((df['genome_start2'].values - start2) // binsize).astype(int) if 'balanced' in df: v = np.nan_to_num(df['balanced'].values) @@ -359,11 +344,11 @@ def make_tiles(hdf_for_resolution, resolution, x_pos, y_pos, transform_type='def bend1 = bend1[bend1 >= 0] bend2 = bend2[bend2 >= 0] - out[:, bi] = np.nan - out[bj, :] = np.nan + out[:, bj] = np.nan + out[bi, :] = np.nan - out[:, bend1] = np.nan - out[bend2, :] = np.nan + out[:, bend2] = np.nan + out[bend1, :] = np.nan # print('sum(isnan1)', isnan1-1) # print('out.ravel()', sum(np.isnan(out.ravel())), len(out.ravel())) @@ -499,7 +484,8 @@ def make_mats(filepath): c = cooler.Cooler(f['0']) - info['chromsizes'] = [[x[0], int(x[1])] for x in c.chromsizes.iteritems()] + info['chromsizes'] = [[x[0], int(x[1])] + for x in c.chromsizes.iteritems()] info["min_pos"] = [int(m) for m in info["min_pos"]] info["max_pos"] = [int(m) for m in info["max_pos"]] info["max_zoom"] = int(info["max_zoom"]) @@ -516,6 +502,8 @@ def make_mats(filepath): mats[filepath] = [f, info] + info['mirror_tiles'] = 'false' + return f, info @@ -609,9 +597,13 @@ def generate_tiles(filepath, tile_ids): tileset_file_and_info = mats[filepath] tile_ids_by_zoom_and_transform = bin_tiles_by_zoom_level_and_transform( - tile_ids).values() - partitioned_tile_ids = list(it.chain(*[hgut.partition_by_adjacent_tiles(t) - for t in tile_ids_by_zoom_and_transform])) + tile_ids + ).values() + + partitioned_tile_ids = list( + it.chain(*[hgut.partition_by_adjacent_tiles(t) + for t in tile_ids_by_zoom_and_transform]) + ) generated_tiles = [] From 05117add3324662ba5cff6d2896409b902a4a8db Mon Sep 17 00:00:00 2001 From: Nezar Date: Tue, 17 Sep 2019 09:13:03 -0400 Subject: [PATCH 02/10] Remove tile de-mirroring for debugging --- clodius/tiles/cooler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clodius/tiles/cooler.py b/clodius/tiles/cooler.py index 0bc4e477..ce29d936 100644 --- a/clodius/tiles/cooler.py +++ b/clodius/tiles/cooler.py @@ -502,7 +502,7 @@ def make_mats(filepath): mats[filepath] = [f, info] - info['mirror_tiles'] = 'false' + # info['mirror_tiles'] = 'false' return f, info From b05b796c91e124c2631dea6a38e9bd0eac2d18f8 Mon Sep 17 00:00:00 2001 From: Nezar Date: Tue, 17 Sep 2019 09:31:19 -0400 Subject: [PATCH 03/10] Unpin numpy in travis config --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 820ed4a6..fe9f8ea2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,8 +17,7 @@ before_install: # Prefer stability to having the latest # - conda update --yes conda install: - - conda install --yes python=$TRAVIS_PYTHON_VERSION numpy=1.13.1 - - pip install -r requirements.txt + - conda install --yes python=$TRAVIS_PYTHON_VERSION numpy - pip install -r requirements-dev.txt script: - ./travis_test.sh From d765652a08bba50dd5b7abbccb681c62025057c4 Mon Sep 17 00:00:00 2001 From: Nezar Date: Tue, 17 Sep 2019 10:58:24 -0400 Subject: [PATCH 04/10] Unpin miniconda installer version in travis to 'latest' --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index fe9f8ea2..1f7853af 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ addons: - liblapack-dev - gfortran before_install: - - wget http://repo.continuum.io/miniconda/Miniconda3-4.3.21-Linux-x86_64.sh -O miniconda.sh + - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" # Prefer stability to having the latest From 549c491496738ccd4a6e1134043f4b1361952772 Mon Sep 17 00:00:00 2001 From: Nezar Date: Tue, 17 Sep 2019 12:15:41 -0400 Subject: [PATCH 05/10] Suppress h5py deprecation warnings --- test/cli_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cli_test.py b/test/cli_test.py index cad8edaa..49f983be 100644 --- a/test/cli_test.py +++ b/test/cli_test.py @@ -110,7 +110,7 @@ def test_clodius_aggregate_bedgraph(): ''' assert(result.exit_code == 0) - f = h5py.File(output_file) + f = h5py.File(output_file, 'r') # print("tile_0_0", d) # print("tile:", cht.get_data(f, 22, 0)) @@ -213,7 +213,7 @@ def test_clodius_aggregate_bedgraph1(): # print("result.output", result.output) - f = h5py.File('/tmp/dm3_values.hitile') + f = h5py.File('/tmp/dm3_values.hitile', 'r') # max_zoom = f['meta'].attrs['max-zoom'] # TODO: Make assertions about result values = f['values_0'] From fce963640d046d506a16b69eb147b84e43328998 Mon Sep 17 00:00:00 2001 From: Nezar Date: Tue, 17 Sep 2019 12:16:24 -0400 Subject: [PATCH 06/10] Fix error of applying dask twice --- test/tiles/hitile_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/tiles/hitile_test.py b/test/tiles/hitile_test.py index 2eea8d41..abb5175c 100644 --- a/test/tiles/hitile_test.py +++ b/test/tiles/hitile_test.py @@ -10,11 +10,13 @@ def test_hitile(): array_size = int(1e6) chunk_size = 2**19 - data = da.from_array(np.random.random((array_size,)), chunks=(chunk_size,)) + data = np.random.random((array_size,)) with tempfile.TemporaryDirectory() as td: output_file = op.join(td, 'blah.hitile') - hghi.array_to_hitile(data, output_file, zoom_step=6) + hghi.array_to_hitile( + data, output_file, zoom_step=6, chunks=(chunk_size,) + ) with h5py.File(output_file, 'r') as f: (means, mins, maxs) = hghi.get_data(f, 0, 0) From dcda011bc8dc4388601abe7f7d8034fcca69b39e Mon Sep 17 00:00:00 2001 From: Nezar Date: Tue, 17 Sep 2019 12:26:39 -0400 Subject: [PATCH 07/10] Drop weird test --- test/bedfile_test.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/bedfile_test.py b/test/bedfile_test.py index b71125dc..e38131cd 100644 --- a/test/bedfile_test.py +++ b/test/bedfile_test.py @@ -21,12 +21,6 @@ def test_nonstandard_chrom(): assert ret is None - ret = cca._bedfile(filename, f.name, - 'dfsdfs', None, False, - None, 100, 1024, None, None, 0) - - assert ret is None - def test_get_tileset_info(): filename = 'test/sample_data/gene_annotations.short.db' From 1332a75e2351cb5618152e5a460896ac9d17dec2 Mon Sep 17 00:00:00 2001 From: Nezar Date: Tue, 17 Sep 2019 13:11:04 -0400 Subject: [PATCH 08/10] Maybe fix array_to_hitile --- clodius/tiles/hitile.py | 1 + test/tiles/hitile_test.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/clodius/tiles/hitile.py b/clodius/tiles/hitile.py index e4f49518..27d5e504 100644 --- a/clodius/tiles/hitile.py +++ b/clodius/tiles/hitile.py @@ -45,6 +45,7 @@ def array_to_hitile(old_data, filename, zoom_step=8, chunks=(1e6,), agg_function min_data = da.from_array(old_data, chunks) max_data = da.from_array(old_data, chunks) + old_data = da.from_array(old_data, chunks) for z in range(0, max_zoom, zoom_step): values_dset = f_new.require_dataset('values_' + str(z), (len(old_data),), diff --git a/test/tiles/hitile_test.py b/test/tiles/hitile_test.py index abb5175c..1dbd35d1 100644 --- a/test/tiles/hitile_test.py +++ b/test/tiles/hitile_test.py @@ -1,4 +1,3 @@ -import dask.array as da import h5py import clodius.tiles.hitile as hghi import numpy as np From 1b2a41a7fe49b180dd1cc7755286c0c11961a100 Mon Sep 17 00:00:00 2001 From: Nezar Date: Mon, 30 Sep 2019 00:51:40 -0400 Subject: [PATCH 09/10] Add origin identifier to tileset_info --- clodius/tiles/cooler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clodius/tiles/cooler.py b/clodius/tiles/cooler.py index ce29d936..e2c8c6a8 100644 --- a/clodius/tiles/cooler.py +++ b/clodius/tiles/cooler.py @@ -502,7 +502,7 @@ def make_mats(filepath): mats[filepath] = [f, info] - # info['mirror_tiles'] = 'false' + info['origin'] = 'upper-left' return f, info From b48119be55ba21c88b2543737b2bc27702087578 Mon Sep 17 00:00:00 2001 From: Nezar Date: Mon, 30 Sep 2019 00:52:41 -0400 Subject: [PATCH 10/10] Change type or mirror_tiles option to bool --- clodius/tiles/cooler.py | 6 +++--- clodius/tiles/density.py | 2 +- clodius/tiles/mrmatrix.py | 2 +- clodius/tiles/npmatrix.py | 2 +- test/mrmatrix_test.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clodius/tiles/cooler.py b/clodius/tiles/cooler.py index e2c8c6a8..328cc349 100644 --- a/clodius/tiles/cooler.py +++ b/clodius/tiles/cooler.py @@ -478,7 +478,7 @@ def make_mats(filepath): info['chromsizes'] = [[x[0], int(x[1])] for x in c.chromsizes.iteritems()] if 'storage-mode' in c.info and c.info['storage-mode'] == 'square': - info['mirror_tiles'] = 'false' + info['mirror_tiles'] = False else: info = _get_info_multi_v1(filepath) @@ -496,9 +496,9 @@ def make_mats(filepath): # legacy metadata for non-symmetric matrices if 'symmetric' in c.info and not c.info['symmetric']: - info['mirror_tiles'] = 'false' + info['mirror_tiles'] = False if 'storage-mode' in c.info and c.info['storage-mode'] == 'square': - info['mirror_tiles'] = 'false' + info['mirror_tiles'] = False mats[filepath] = [f, info] diff --git a/clodius/tiles/density.py b/clodius/tiles/density.py index 8b438f46..f39e0f62 100644 --- a/clodius/tiles/density.py +++ b/clodius/tiles/density.py @@ -55,7 +55,7 @@ def tileset_info(points_file): 'max_pos': [float(attrs['max_x']), float(attrs['max_y'])], 'max_width': float(attrs['max_width']), 'max_zoom': int(attrs['max_zoom']), - 'mirror_tiles': 'false' + 'mirror_tiles': False } diff --git a/clodius/tiles/mrmatrix.py b/clodius/tiles/mrmatrix.py index b3d034c7..7be55408 100644 --- a/clodius/tiles/mrmatrix.py +++ b/clodius/tiles/mrmatrix.py @@ -16,7 +16,7 @@ def tileset_info(f, bounds=None): 'min_pos': min_pos, 'max_pos': max_pos, 'resolutions': [int(r) for r in f['resolutions']], - 'mirror_tiles': 'false', + 'mirror_tiles': False, 'bins_per_dimension': 256, } diff --git a/clodius/tiles/npmatrix.py b/clodius/tiles/npmatrix.py index 63f00c6c..84796cc6 100644 --- a/clodius/tiles/npmatrix.py +++ b/clodius/tiles/npmatrix.py @@ -60,7 +60,7 @@ def tileset_info(grid, bounds=None): "min_pos": min_pos, "max_pos": max_pos, "max_zoom": max_zoom, - "mirror_tiles": "false", + "mirror_tiles": False, "bins_per_dimension": bin_size } diff --git a/test/mrmatrix_test.py b/test/mrmatrix_test.py index 71cd43fc..f598e524 100644 --- a/test/mrmatrix_test.py +++ b/test/mrmatrix_test.py @@ -35,7 +35,7 @@ def setUp(self): 'bins_per_dimension': 256, 'max_pos': (2, 2), 'min_pos': [0, 0], - 'mirror_tiles': 'false', + 'mirror_tiles': False, 'resolutions': [1] }