-
Notifications
You must be signed in to change notification settings - Fork 3
/
run_meta_analysis_workflow.wdl
163 lines (132 loc) · 5.7 KB
/
run_meta_analysis_workflow.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
version development
import "./run_association_test_workflow.wdl" as run_association_test
import "./tasks/preprocess_workflow.wdl" as gwas_tasks
import "./tasks/meta_analysis_tasks.wdl" as meta_analysis_tasks
workflow run_meta_analysis {
input {
File batch_tsv_file
File covariate_tsv_file
File variable_info_tsv_file
String binary_covar_list
String continuous_covar_list
String phenoCol
String covar_sampleID_colname
String phenotype_type
String setname
Boolean? useBOLT
Boolean? useSAIGE
#for bolt
File? genetic_map_file
File? ld_scores_file
Float? minMAF=0.001
Float? minMAC=1
File? chain_file
String? id_delim #delim character for vcf file, if not defined, double ID is assumed
String? dosageField
Int? numPCs
String? approx
}
#read in batch information. If there are more than 1 batch we will meta analyze the summary results
Array[Array[String]] batch_tsv = read_tsv(batch_tsv_file)
Int n_batches = length(batch_tsv)-1
scatter (idx in range(n_batches)) {
Array[String] batch_tsv_rows = batch_tsv[(idx+1)]
}
Map[String, Array[String]] batch_tbl = as_map(zip(batch_tsv[0], transpose(batch_tsv_rows)))
scatter (idx in range(n_batches)) {
File genotype_bed = batch_tbl["genotype_bed"][idx]
File genotype_bim = batch_tbl["genotype_bim"][idx]
File genotype_fam = batch_tbl["genotype_fam"][idx]
File genotype_samples_to_keep_file = batch_tbl["genotype_samples_to_keep_file"][idx]
File imputed_samples_to_keep_file = batch_tbl["imputed_samples_to_keep_file"][idx]
File imputed_list_of_files = batch_tbl["imputed_list_of_files"][idx]
String impute_file_format = batch_tbl["impute_file_format"][idx]
String batch_name = batch_tbl["batch_name"][idx]
call run_association_test.run_association_test {
input:
genotype_bed = genotype_bed,
genotype_bim = genotype_bim,
genotype_fam = genotype_fam,
genotype_samples_to_keep_file = genotype_samples_to_keep_file,
imputed_samples_to_keep_file = imputed_samples_to_keep_file,
covariate_tsv_file = covariate_tsv_file,
imputed_list_of_files = imputed_list_of_files,
impute_file_format = impute_file_format,
batch_name = batch_name,
useBOLT = useBOLT,
useSAIGE = useSAIGE,
#for bolt
genetic_map_file = genetic_map_file,
ld_scores_file = ld_scores_file,
minMAF = minMAF,
minMAC = minMAC,
chain_file = chain_file,
id_delim = id_delim,
binary_covar_list = binary_covar_list,
continuous_covar_list = continuous_covar_list,
phenoCol = phenoCol,
covar_sampleID_colname = covar_sampleID_colname,
phenotype_type = phenotype_type,
setname = setname,
dosageField = dosageField,
numPCs = numPCs,
approx = approx
}
}
if (defined(useBOLT) && useBOLT ) {
call meta_analysis_tasks.run_metal as run_metal_bolt {
input:
association_summary_files = run_association_test.merged_bolt_file,
prefix = setname,
FREQLABEL = "A1FREQ"
}
call gwas_tasks.make_summary_plots as make_bolt_plots {
input:
association_summary_file = run_metal_bolt.metal_output_file,
BP_column = "POS",
CHR_column = "CHR",
pval_col = "P-value",
minrep_col = "MarkerName",
}
call gwas_tasks.make_r_markdown_report as make_bolt_report {
input:
manhattan_file = make_bolt_plots.manhattan_file,
qqplot_file = make_bolt_plots.qqplot_file,
report_prefix = setname + "_bolt"
}
}
if (defined(useSAIGE) && useSAIGE ) {
call meta_analysis_tasks.run_metal as run_metal_saige {
input:
association_summary_files = run_association_test.merged_saige_file,
prefix = setname,
FREQLABEL = ""
}
call gwas_tasks.make_summary_plots as make_saige_plots {
input:
association_summary_file = run_metal_saige.metal_output_file,
BP_column = "POS",
CHR_column = "CHR",
pval_col = "P-value",
minrep_col = "MarkerName"
}
call gwas_tasks.make_r_markdown_report as make_saige_report {
input:
manhattan_file = make_saige_plots.manhattan_file,
qqplot_file = make_saige_plots.qqplot_file,
report_prefix = setname + "_saige"
}
}
output {
File? bolt_metal_output_file = run_metal_bolt.metal_output_file
File? bolt_metal_info_file = run_metal_bolt.metal_info_file
File? bolt_metal_manhattan_file = make_bolt_plots.manhattan_file
File? bolt_metal_qqplot_file = make_bolt_plots.qqplot_file
File? bolt_report_file = make_bolt_report.report_file
File? saige_metal_output_file = run_metal_saige.metal_output_file
File? saige_metal_info_file = run_metal_saige.metal_info_file
File? saige_metal_manhattan_file = make_saige_plots.manhattan_file
File? saige_metal_qqplot_file = make_saige_plots.qqplot_file
File? saige_report_file = make_saige_report.report_file
}
}