From 55c1c6c29cd9f5c2ade0ca786067c41c64baeea4 Mon Sep 17 00:00:00 2001 From: Todd Morse Date: Fri, 9 Oct 2020 20:25:27 -0400 Subject: [PATCH] idseq dedup cluster header bugfix (#52) --- .../idseq-dag/idseq_dag/util/idseq_dedup_clusters.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/short-read-mngs/idseq-dag/idseq_dag/util/idseq_dedup_clusters.py b/short-read-mngs/idseq-dag/idseq_dag/util/idseq_dedup_clusters.py index cf4d8e26..161b8759 100644 --- a/short-read-mngs/idseq-dag/idseq_dag/util/idseq_dedup_clusters.py +++ b/short-read-mngs/idseq-dag/idseq_dag/util/idseq_dedup_clusters.py @@ -3,7 +3,7 @@ The first column contains the representative read id of a cluster, and the second column contains the read id. """ -from csv import reader +from csv import DictReader from typing import Dict, Optional, Tuple @@ -12,7 +12,8 @@ def parse_clusters_file( ) -> Dict[str, Optional[Tuple]]: clusters_dict = {} with open(idseq_dedup_clusters_path) as f: - for r_read_id, read_id in reader(f): + for row in DictReader(f): + r_read_id, read_id = row["representative read id"], row["read id"] if r_read_id not in clusters_dict: clusters_dict[r_read_id] = (1,) else: