Skip to content

Commit

Permalink
fixup: split nextclade
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 committed Dec 6, 2023
1 parent 26e00f3 commit 87d8c5d
Showing 1 changed file with 17 additions and 60 deletions.
77 changes: 17 additions & 60 deletions ingest/workflow/snakemake_rules/nextclade.smk
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
rule nextclade_all:
"""
Classify sequences into DENV1, DENV2, DENV3, and DENV4
"""
input:
sequences="results/sequences.fasta",
dataset="../nextclade_data/all",
Expand All @@ -18,6 +21,9 @@ rule nextclade_all:

# Might be able to parallelize this
rule split_dengue_sequences:
"""
Split sequences into DENV1, DENV2, DENV3, and DENV4
"""
input:
sequences="results/sequences.fasta",
metadata="results/metadata.tsv",
Expand All @@ -31,7 +37,7 @@ rule split_dengue_sequences:
shell:
"""
cp {input.sequences} {output.sequences_all}
augur filter \
--sequences results/sequences.fasta \
--metadata {input.nextclade_all_results} \
Expand Down Expand Up @@ -61,67 +67,15 @@ rule split_dengue_sequences:
--output-sequences {output.sequences_denv4}
"""

# The following rules should use wildcards
rule nextclade_denv1:
input:
sequences="results/sequences_denv1.fasta",
dataset="../nextclade_data/denv1",
output:
"results/nextclade_results/nextclade_denv1.tsv",
threads: 4
shell:
"""
nextclade run \
--input-dataset {input.dataset} \
-j {threads} \
--output-tsv {output} \
--min-match-rate 0.01 \
--silent \
{input.sequences}
"""

rule nextclade_denv2:
input:
sequences="results/sequences_denv2.fasta",
dataset="../nextclade_data/denv2",
output:
"results/nextclade_results/nextclade_denv2.tsv",
threads: 4
shell:
"""
nextclade run \
--input-dataset {input.dataset} \
-j {threads} \
--output-tsv {output} \
--min-match-rate 0.01 \
--silent \
{input.sequences}
"""

rule nextclade_denv3:
input:
sequences="results/sequences_denv3.fasta",
dataset="../nextclade_data/denv3",
output:
"results/nextclade_results/nextclade_denv3.tsv",
threads: 4
shell:
"""
nextclade run \
--input-dataset {input.dataset} \
-j {threads} \
--output-tsv {output} \
--min-match-rate 0.01 \
--silent \
{input.sequences}
"""

rule nextclade_denv4:
rule nextclade_denvX:
"""
For each type, classify into the appropriate subtype
"""
input:
sequences="results/sequences_denv4.fasta",
dataset="../nextclade_data/denv4",
sequences="results/sequences_denv{x}.fasta",
dataset="../nextclade_data/denv{x}",
output:
"results/nextclade_results/nextclade_denv4.tsv",
"results/nextclade_results/nextclade_denv{x}.tsv",
threads: 4
shell:
"""
Expand All @@ -135,6 +89,9 @@ rule nextclade_denv4:
"""

rule join_nextclade_clades:
"""
Merge all the nextclade results into metadata and split metadata
"""
input:
metadata="results/metadata.tsv",
nextclade_denv1="results/nextclade_results/nextclade_denv1.tsv",
Expand Down

0 comments on commit 87d8c5d

Please sign in to comment.