From d16edc08a4fee1810142a3ee9d5a9ef3979219e4 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 5 Apr 2024 11:21:54 -0700 Subject: [PATCH] fixup: move hard-coded columns to a shared workflow variable or config params so they don't get out of sync between rules --- ingest/defaults/config.yaml | 4 +++- ingest/rules/nextclade.smk | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml index d65f8d5a..4afafebc 100644 --- a/ingest/defaults/config.yaml +++ b/ingest/defaults/config.yaml @@ -110,4 +110,6 @@ nextclade: min_length: 1000 # E gene length is approximately 1400nt min_seed_cover: 0.1 nextclade_field: 'nextclade_subtype' - gene: ['E','NS1'] \ No newline at end of file + gene: ['E','NS1'] + input_nextclade_fields: "seqName,clade,alignmentStart,alignmentEnd,coverage,failedCdses" + output_nextclade_fields: "alignmentStart,alignmentEnd,genome_coverage,failedCdses" \ No newline at end of file diff --git a/ingest/rules/nextclade.smk b/ingest/rules/nextclade.smk index e153c418..32a60a7d 100644 --- a/ingest/rules/nextclade.smk +++ b/ingest/rules/nextclade.smk @@ -63,13 +63,15 @@ rule concat_nextclade_subtype_results: params: id_field=config["curate"]["id_field"], nextclade_field=config["nextclade"]["nextclade_field"], + input_nextclade_fields=config["nextclade"]["input_nextclade_fields"], + output_nextclade_fields=config["nextclade"]["output_nextclade_fields"], shell: """ - echo "{params.id_field},{params.nextclade_field},alignmentStart,alignmentEnd,genome_coverage,failedCdses" \ + echo "{params.id_field},{params.nextclade_field},{params.output_nextclade_fields}" \ | tr ',' '\t' \ > {output.nextclade_subtypes} - tsv-select -H -f "seqName,clade,alignmentStart,alignmentEnd,coverage,failedCdses" {input.nextclade_results_files} \ + tsv-select -H -f "{params.input_nextclade_fields}" {input.nextclade_results_files} \ | awk 'NR>1 {{print}}' \ >> {output.nextclade_subtypes} """ @@ -86,12 +88,13 @@ rule append_nextclade_columns: params: id_field=config["curate"]["id_field"], nextclade_field=config["nextclade"]["nextclade_field"], + output_nextclade_fields=config["nextclade"]["output_nextclade_fields"], shell: """ tsv-join -H \ --filter-file {input.nextclade_subtypes} \ --key-fields {params.id_field} \ - --append-fields {params.nextclade_field},alignmentStart,alignmentEnd,genome_coverage,failedCdses \ + --append-fields {params.nextclade_field},{params.output_nextclade_fields} \ --write-all ? \ {input.metadata} \ > {output.metadata_all}