-
Notifications
You must be signed in to change notification settings - Fork 0
/
otu_tables.Rmd
99 lines (69 loc) · 3.84 KB
/
otu_tables.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---
Title: "Generate OTU tables"
Author: Henry Paz ([email protected])
Output:
html_document:
keep_md: yes
---
Generate OTU table for bacteria
```{r, engine='bash'}
#use a custom perl script to convert the fasta file from QIIME format to a format that works with vsearch pipeline to generate the OTU table
chmod 775 scripts/qiime_to_usearch.pl
#format bacteria file
scripts/qiime_to_usearch.pl -fasta=bactqc_trim.rc.fasta -prefix=file
#rename file
mv format.fasta bact_format.fasta
#make vsearch_outputs directory
mkdir vsearch_outputs
#vsearch pipeline
#dereplicate sequences
vsearch --derep_fulllength bact_format.fasta --sizeout --output vsearch_outputs/bact_derep.fasta
#sort by decreasing abundance and remove singletons
vsearch --sortbysize vsearch_outputs/bact_derep.fasta --minsize 2 --output vsearch_outputs/bact_derep_sort.fasta
#de novo detection and sorting out of chimeras
vsearch --uchime_denovo vsearch_outputs/bact_derep_sort.fasta --nonchimeras vsearch_outputs/bact_nonchim_denovo.fasta
#decompress gold.fasta.gz
gzip -d gold.fasta.gz
#reference based and sorting out of chimeras
vsearch --uchime_ref vsearch_outputs/bact_nonchim_denovo.fasta --db gold.fasta --nonchimeras vsearch_outputs/bact_nonchim_denovo_ref.fasta
#sort by decreasing abundance
vsearch -sortbysize vsearch_outputs/bact_nonchim_denovo_ref.fasta -output vsearch_outputs/bact_nonchim_denovo_refsort.fasta
#cluster sequences at 97% identity
vsearch --cluster_smallmem vsearch_outputs/bact_nonchim_denovo_refsort.fasta --id 0.97 --consout vsearch_outputs/bact_oturep.fasta --relabel OTU
#format header
awk 'BEGIN{OFS="";ORS="";count=1}{if ($0~/>/){if (NR>1) {print "\n"} print ">" count "\n"; count+=1;} else {print $0;}}' vsearch_outputs/bact_oturep.fasta > vsearch_outputs/bact_oturep_header.fasta
#compare sequences using global pairwise alignment
vsearch --usearch_global bact_format.fasta --db vsearch_outputs/bact_oturep_header.fasta --id 0.97 --uc vsearch_outputs/bact_map.uc
#make otu_tables directory
mkdir mkdir otu_tables
#format OTU table
chmod 775 scripts/*
#change from uc to tab delimited format
python scripts/uc2otutab.py vsearch_outputs/bact_map.uc > otu_tables/bact_otutable.txt
```
Generate OTU table for archaea
```{r, engine='bash'}
#format archeae file
scripts/qiime_to_usearch.pl -fasta=archqc_trim.rc.fasta -prefix=file
#rename file
mv format.fasta arch_format.fasta
#vsearch pipeline
#dereplicate sequences
vsearch --derep_fulllength arch_format.fasta --sizeout --output vsearch_outputs/arch_derep.fasta
#sort by decreasing abundance and remove singletons
vsearch --sortbysize vsearch_outputs/arch_derep.fasta --minsize 2 --output vsearch_outputs/arch_derep_sort.fasta
#de novo detection and sorting out of chimeras
vsearch --uchime_denovo vsearch_outputs/arch_derep_sort.fasta --nonchimeras vsearch_outputs/arch_nonchim_denovo.fasta
#reference based and sorting out of chimeras
vsearch --uchime_ref vsearch_outputs/arch_nonchim_denovo.fasta --db gold.fasta --nonchimeras vsearch_outputs/arch_nonchim_denovo_ref.fasta
#sort by decreasing abundance
vsearch -sortbysize vsearch_outputs/arch_nonchim_denovo_ref.fasta -output vsearch_outputs/arch_nonchim_denovo_refsort.fasta
#cluster sequences at 97% identity
vsearch --cluster_smallmem vsearch_outputs/arch_nonchim_denovo_refsort.fasta --id 0.97 --consout vsearch_outputs/arch_oturep.fasta --relabel OTU
#format header
awk 'BEGIN{OFS="";ORS="";count=1}{if ($0~/>/){if (NR>1) {print "\n"} print ">" count "\n"; count+=1;} else {print $0;}}' vsearch_outputs/arch_oturep.fasta > vsearch_outputs/arch_oturep_header.fasta
#compare sequences using global pairwise alignment
vsearch --usearch_global arch_format.fasta --db vsearch_outputs/arch_oturep_header.fasta --id 0.97 --uc vsearch_outputs/arch_map.uc
#change from uc to tab delimited format
python scripts/uc2otutab.py vsearch_outputs/arch_map.uc > otu_tables/arch_otutable.txt
```