-
Notifications
You must be signed in to change notification settings - Fork 2
/
Snakefile
109 lines (89 loc) · 4.89 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import pandas as pd
import os
configfile: "config.json"
localrules: all, mkdir
df = pd.read_csv(config["meta_file"], sep='\t', header=0, index_col=0)
sample_ids = list(df.index)
df.index = sample_ids
def get_pair_gz(sample_id):
dir = config["raw_fastq_gz_dir"]
return tuple(os.path.join(dir, df.loc[str(sample_id), x]) for x in ('ForwardFastqGZ', 'ReverseFastqGZ'))
def get_forward_primer(sample_id):
return df.loc[sample_id]["Adapter_1"]
def get_reverse_primer(sample_id):
return df.loc[sample_id]["Adapter_2"]
def get_gene_value(sample_id):
return df.loc[sample_id]["gene"]
def get_template_fasta(sample_id):
return df.loc[sample_id]["path_to_treat_template"]
def get_offset_value(sample_id):
return df.loc[sample_id]["offset"]
def get_replicate_value(sample_id):
return df.loc[sample_id]["replicate"]
def get_knockdown_gene(sample_id):
return df.loc[sample_id]["knock_down"]
def get_tet_flag(sample_id):
return df.loc[sample_id]["tetracycline"]
rule all:
input:expand("{dir}/{sample_id}.fa", dir=config["dir_names"]["collapse_dir"],sample_id=sample_ids)
run:
for sample in sample_ids:
gene = get_gene_value(sample)
offset = get_offset_value(sample)
template = get_template_fasta(sample)
rep = get_replicate_value(sample)
knock = get_knockdown_gene(sample)
version = config["tool_version"]["treat"]
if (get_tet_flag(sample)):
#os.system("./tools/treat/" + str(version) + "/treat load --sample " + sample + " --fasta outputs/collapse/" + sample + ".fa --gene " + gene +" --offset " + str(offset) + " --template "+template+" --replicate "+ str(rep) + " --knock-down " +knock+" --tet")
os.system("./tools/treat/" + str(version) + "/treat load --sample " + sample + " --fasta " + config["dir_names"]["collapse_dir"]+"/" + sample + ".fa --gene " + gene +" --offset " + str(offset) + " --template "+template+" --replicate "+ str(rep) + " --exclude-snps --knock-down " +knock+" --tet")
else:
#os.system("./tools/treat/" + str(version) + "/treat load --sample " + sample + " --fasta outputs/collapse/" + sample + ".fa --gene " + gene +" --offset " + str(offset) + " --template "+template+" --replicate "+ str(rep) + " --knock-down " + knock)
os.system("./tools/treat/" + str(version) + "/treat load --sample " + sample + " --fasta " + config["dir_names"]["collapse_dir"]+"/" + sample + ".fa --gene " + gene +" --offset " + str(offset) + " --template "+template+" --replicate "+ str(rep) + " --exclude-snps --knock-down " + knock)
rule mkdir:
output: touch(config["file_names"]["mkdir_done"])
params: dirs = list(config["dir_names"].values())
shell: "mkdir -p {params.dirs}"
rule unzip:
input:
lambda wildcards: get_pair_gz(wildcards.sample_id),
rules.mkdir.output
output:
config["dir_names"]["unzip_dir"] + "/{sample_id}_R1.fq",
config["dir_names"]["unzip_dir"] + "/{sample_id}_R2.fq"
shell:
"gunzip -c {input[0]} > {output[0]} && gunzip -c {input[1]} > {output[1]}"
rule join:
input: rules.unzip.output
output: config["dir_names"]["join_dir"] + "/{sample_id}.assembled.fastq"
version: config["tool_version"]["pear"]
params:
outputDir = config["dir_names"]["join_dir"]
#shell: "tools/pear/{version}/pear -f {input[0]} -r {input[1]} -o outputs/join/{wildcards.sample_id}"
shell: "tools/pear/{version}/pear -f {input[0]} -r {input[1]} -o {params.outputDir}/{wildcards.sample_id}" #outputs/join/{wildcards.sample_id}"
rule filter:
input: rules.join.output
output: config["dir_names"]["filter_dir"] + "/{sample_id}.fq"
version: config["tool_version"]["fastx"]
params:
percentage = config["parameters"]["quality_filtering"]["percentage"],
qscore = config["parameters"]["quality_filtering"]["qscore"]
shell: "tools/fastx/{version}/fastq_quality_filter -i {input} -o {output} -q {params.qscore} -p {params.percentage} -Q33 -v"
rule fq_2_fa:
input: rules.filter.output
output: config["dir_names"]["fq_2_fa_dir"] + "/{sample_id}.fa"
version: config["tool_version"]["fastx"]
shell: "tools/fastx/{version}/fastq_to_fasta -i {input} -o {output} -n -v -Q33"
rule trim:
input: rules.fq_2_fa.output
output: config["dir_names"]["trimmed_dir"] + "/{sample_id}.fa"
version: config["tool_version"]["cutadapt"]
params:
adapter1=lambda wildcards: get_forward_primer(wildcards.sample_id),
adapter2=lambda wildcards: get_reverse_primer(wildcards.sample_id)
shell: "cutadapt -m 15 -g {params.adapter1} -a {params.adapter2} -n 2 {input} > {output} 2>{output}.stats"
rule collapse:
input: rules.trim.output
output: config["dir_names"]["collapse_dir"] + "/{sample_id}.fa"
version: config["tool_version"]["fastx"]
shell: "./tools/fastx/{version}/fastx_collapser -i {input} -o {output} -v"