-
Notifications
You must be signed in to change notification settings - Fork 0
/
Run_all_tools_fix1.sh
executable file
·283 lines (225 loc) · 8.3 KB
/
Run_all_tools_fix1.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
#!/bin/bash
RIGHT_NOW=$(date +"%x %r %Z")
TIME_STAMP="Updated on $RIGHT_NOW by $USER"
##### Functions
help()
{
echo "-A | --ASV_table -> A tsv file where each column represents a different ASV and each row represents a different samples"
echo "-G | --Groupings -> A tsv file with two columns. One columns represents the sample names while the other column represents the group for that sample"
echo "-O | --output_path -> the path to the directory that the output of each test should be placed into"
echo "-F | --Filt -> The precentage of samples required for a feature to be present in so that it will not be filtered out"
echo "-h | --help -> The output of this command!"
echo "-d | --depth -> Depth to rarifiy filtered tables to"
}
usage()
{
echo "usage: Run_all_tools -A [PATH_TO_ASV_TABLE] -G [PATH_TO_GROUPING_TABLE] -O [PATH_TO_OUTPUT_DIRECTORY] -R [PATH_TO_RARIFIED_TABLE] -d depth -f filter_level"
}
Run_ALDEX2()
{
#A simple Rscript that takes in two TSV files (one ASV table) and (one Grouping table) and runs ALDEX2 differential abundance
echo "Running ALDEx2"
mkdir $Output_Path/Aldex_out
out_file=$Output_Path/Aldex_out/Aldex_res.tsv
Rscript Tool_scripts/Run_Aldex2.R $ASV_table_Path $Groupings_Path $out_file
}
Run_DeSeq2()
{
#A simple Rscript that takes in two TSV files (one ASV table) and (one Grouping table) and runs DeSeq2 differential abundance
echo "Running DeSeq2"
mkdir $Output_Path/Deseq2_out
out_file_deseq=$Output_Path/Deseq2_out/Deseq2_results.tsv
Rscript Tool_scripts/Run_DESeq2.R $ASV_table_Path $Groupings_Path $out_file_deseq
}
Run_Ancom2()
{
echo "Running ANCOM"
mkdir $Output_Path/ANCOM_out
out_file_ancom=$Output_Path/ANCOM_out/Ancom_res.tsv
Rscript Tool_scripts/Run_ANCOM.R $ASV_table_Path $Groupings_Path $out_file_ancom $PWD/Ancom2_Script/ancom_v2.1.R
}
Run_Lefse()
{
## Would like to find a way around activiating this environment to run this as it does take some time to run...
source activate hackathon
echo "Running Lefse on rarified input table"
mkdir $Output_Path/Lefse_out
out_file_lefse=$Output_Path/Lefse_out/lefse_format_file.tsv
Rscript Tool_scripts/Format_lefse.R $Rar_ASV_table_PATH $Groupings_Path $out_file_lefse
formated_out_file_lefse=$Output_Path/Lefse_out/lefse_formatted.lefse
format_input.py $out_file_lefse $formated_out_file_lefse -c 2 -u 1 -o 1000000
lefse_results=$Output_Path/Lefse_out/Lefse_results.tsv
run_lefse.py $formated_out_file_lefse $lefse_results
echo "Done running Lefse"
source deactivate hackathon
}
##### Main
Run_Corncob()
{
mkdir $Output_Path/Corncob_out
out_file_corncob=$Output_Path/Corncob_out/Corncob_results.tsv
Rscript Tool_scripts/Run_Corncob.R $ASV_table_Path $Groupings_Path $out_file_corncob
}
Run_Wilcoxin_rare()
{
mkdir $Output_Path/Wilcoxon_rare_out
out_file_wil_rare=$Output_Path/Wilcoxon_rare_out/Wil_rare_results.tsv
Rscript Tool_scripts/Run_Wilcox_rare.R $Rar_ASV_table_PATH $Groupings_Path $out_file_wil_rare
}
Run_Wilcoxin_CLR()
{
mkdir $Output_Path/Wilcoxon_CLR_out
out_file_wil_CLR=$Output_Path/Wilcoxon_CLR_out/Wil_CLR_results.tsv
Rscript Tool_scripts/Run_Wilcox_CLR.R $ASV_table_Path $Groupings_Path $out_file_wil_CLR
}
Run_Maaslin2_rare()
{
echo "Running Maaslin2 with rarified table"
mkdir $Output_Path/Maaslin2_rare_out
out_file_maas_rare=$Output_Path/Maaslin2_rare_out
Rscript Tool_scripts/Run_Maaslin2.R $Rar_ASV_table_PATH $Groupings_Path $out_file_maas_rare
}
Run_Maaslin2()
{
echo "Running Maaslin2 on non-rarified table"
mkdir $Output_Path/Maaslin2_out
out_file_maas=$Output_Path/Maaslin2_out
Rscript Tool_scripts/Run_Maaslin2.R $ASV_table_Path $Groupings_Path $out_file_maas
}
Run_metagenomeSeq()
{
echo "Running metagenomeSeq using fitFeatureModel"
mkdir $Output_Path/metagenomeSeq_out
out_file_mgSeq=$Output_Path/metagenomeSeq_out/mgSeq_res.tsv
Rscript Tool_scripts/Run_metagenomeSeq.R $ASV_table_Path $Groupings_Path $out_file_mgSeq
}
Run_edgeR()
{
echo "Running edgeR"
mkdir $Output_Path/edgeR_out
out_file_edgeR=$Output_Path/edgeR_out/edgeR_res.tsv
Rscript Tool_scripts/Run_edgeR.R $ASV_table_Path $Groupings_Path $out_file_edgeR
}
Run_t_test_rare()
{
echo "Running T test"
mkdir $Output_Path/t_test_rare_out
out_file_t_rare=$Output_Path/t_test_rare_out/t_test_res.tsv
Rscript Tool_scripts/Run_t_test_rare.R $Rar_ASV_table_PATH $Groupings_Path $out_file_t_rare
}
Groupings_Path=
ASV_table_Path=
Output_Path=
Rar_ASV_table_Path=
Filt_level=0
depth=0
while [ "$1" != "" ]; do
case $1 in
-A | --ASV_table ) shift
ASV_table_Path=$1
;;
-R | --rar_ASV_table ) shift
Rar_ASV_table_PATH=$1
;;
-G | --Groupings ) shift
Groupings_Path=$1
;;
-F | --Filt ) shift
Filt_level=$1
;;
-h | --help ) usage
exit
;;
-O | --outputh_path) shift
Output_Path=$1
;;
-D | --depth) shift
depth=$1
;;
* ) usage
exit 1
esac
shift
done
# Test code to verify command line processing
time_file=$Output_Path/time_file.txt
touch $time_file
current=$SECONDS
### We will now set up the code to filter the samples and make sure
### that the rarified table has the same samples and the non-rarified
### tables
echo "Ensuring samples are the same between tables"
table_name="${ASV_table_Path##*/}"
mkdir $Output_Path/fixed_non_rare_tables/
mkdir $Output_Path/fixed_rare_tables/
out_file_new_tab_ASV=$Output_Path/fixed_non_rare_tables/$table_name
out_file_new_tab_rar_ASV=$Output_Path/fixed_rare_tables/$table_name
### Run script that checks if rare table has the same samples as the non-rar table and then filters the non-rare tab
if [ $Filt_level == 0 ]; then
echo "No Filtering was selected. Due to this we expect that a rarified table has also been provided. This will be fixed in future update"
Rscript Tool_scripts/Filter_samples_of_non_rare_table.R $ASV_table_Path $Rar_ASV_table_PATH $out_file_new_tab_ASV
ASV_table_Path=$out_file_new_tab_ASV
else
#### Run script that checks if non-rare table has the same samples as rare table
#### The script also takes the filter level and filters the non-rare table to that level
if [ $depth == 0 ]; then
echo "Please Enter the rarification depth you would like to us"
exit 1
else
Rscript Tool_scripts/Filter_samples_and_features.R $ASV_table_Path $Filt_level $out_file_new_tab_ASV $out_file_new_tab_rar_ASV $depth
ASV_table_Path=$out_file_new_tab_ASV
Rar_ASV_table_PATH=$out_file_new_tab_rar_ASV
fi
fi
echo $ASV_table_Path
echo $Rar_ASV_table_PATH
duration=$(( SECONDS - current))
echo "Filtering took "$duration" seconds" >> $time_file
current=$SECONDS
Run_ALDEX2
duration=$(( SECONDS - current))
echo "Aldex2 took "$duration" seconds" >> $time_file
current=$SECONDS
Run_DeSeq2
duration=$(( SECONDS - current))
echo "Deseq2 took " $duration" seconds" >> $time_file
current=$SECONDS
Run_Lefse
duration=$(( SECONDS - current))
echo "Lefse took "$duration" seconds" >> $time_file
current=$SECONDS
Run_Corncob
duration=$(( SECONDS - current))
echo "Corncob took "$duration" seconds" >> $time_file
current=$SECONDS
Run_Wilcoxin_rare
duration=$(( SECONDS - current))
echo "Wilcoxon rare took "$duration" seconds" >> $time_file
current=$SECONDS
Run_Wilcoxin_CLR
duration=$(( SECONDS - current))
echo "Wilcoxon CLR took "$duration" seconds" >> $time_file
current=$SECONDS
Run_Maaslin2_rare
duration=$(( SECONDS - current))
echo "Maaslin2 rare took "$duration" seconds" >> $time_file
current=$SECONDS
Run_Maaslin2
duration=$(( SECONDS - current))
echo "Maaslin2 took "$duration" seconds" >> $time_file
current=$SECONDS
Run_Ancom2
duration=$(( SECONDS - current))
echo "Ancom2 took "$duration" seconds" >> $time_file
current=$SECONDS
Run_metagenomeSeq
duration=$(( SECONDS - current))
echo "metagenomeSeq took "$duration" seconds" >> $time_file
current=$SECONDS
Run_edgeR
duration=$(( SECONDS - current))
echo "edgeR took "$duration" seconds" >> $time_file
current=$SECONDS
Run_t_test_rare
duration=$(( SECONDS - current))
echo "t test rare took "$duration" seconds" >> $time_file