-
Notifications
You must be signed in to change notification settings - Fork 71
/
Dockerfile
170 lines (164 loc) · 6.23 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
ARG UBUNTU_VERSION=20.04
FROM ubuntu:$UBUNTU_VERSION AS gridss_base_closest_mirror
# Use the closest mirror so apt-get doesnt take ages
RUN sed -i -e 's/http:\/\/archive\.ubuntu\.com\/ubuntu\//mirror:\/\/mirrors\.ubuntu\.com\/mirrors\.txt/' /etc/apt/sources.list
# Set up a C build environment for gridsstools, samtools, and R packages
FROM gridss_base_closest_mirror AS gridss_c_build_environment
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libssl-dev \
libcurl4-openssl-dev \
libxml2-dev \
zlib1g-dev \
libbz2-dev \
liblzma-dev \
libdeflate-dev \
build-essential \
autotools-dev \
autoconf \
autogen \
make \
wget \
libomp-dev \
&& rm -rf /var/lib/apt/lists/*
# compile gridsstools
FROM gridss_c_build_environment AS gridss_builder_c
RUN mkdir /opt/gridss/
ARG GRIDSS_VERSION
COPY src/main/c /opt/gridss/src/main/c
COPY src/test/resources /opt/gridss/src/test/resources
RUN cd /opt/gridss/src/main/c/gridsstools/htslib && \
autoreconf -i && ./configure && make -j 8 && \
cd .. && \
autoreconf -i && ./configure && make -j 8 && \
cp gridsstools /opt/gridss/
# compile GRIDSS Java code
FROM maven:3.8.4-jdk-11 AS gridss_builder_java
RUN mkdir /opt/gridss/
WORKDIR /opt/gridss/
# Download maven dependencies first so docker can cache them
COPY pom.xml /opt/gridss/
COPY repo /opt/gridss/repo
# run all stages so all dependencies are cached
RUN mvn -Dmaven.artifact.threads=8 verify && rm -rf target
# Build GRIDSS jar
ARG GRIDSS_VERSION
COPY src /opt/gridss/src
RUN mvn -T 1C -Drevision=${GRIDSS_VERSION} package && \
cp target/gridss-${GRIDSS_VERSION}-gridss-jar-with-dependencies.jar /opt/gridss/
FROM gridss_c_build_environment AS gridss
# Setup CRAN ubuntu package repository
# apt-get clean not required for ubuntu images
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
apt-transport-https \
software-properties-common \
dirmngr \
gnupg && \
apt-key adv \
--keyserver hkp://keyserver.ubuntu.com:80 \
--recv-keys 0xE298A3A825C0D65DFD57CBB651716619E084DAB9 && \
add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' && \
apt-get update && apt-get install -y \
apt-utils \
gawk \
openjdk-11-jre-headless \
bwa \
hmmer \
bedtools \
bcftools \
r-base \
time \
libomp-dev \
perl-modules \
libtext-soundex-perl \
python3-h5py \
rsync \
curl \
&& rm -rf /var/lib/apt/lists/*
# samtools needs to be installed from source since the OS package verion is too old
RUN mkdir /opt/samtools && \
cd /opt/samtools && \
wget https://github.com/samtools/samtools/releases/download/1.14/samtools-1.14.tar.bz2 && \
tar -jxf samtools-1.14.tar.bz2 && \
cd samtools-1.14 && \
autoheader && \
autoconf -Wno-syntax && \
./configure && \
make install && \
cd ~ && \
rm -rf /opt/samtools
### Repeat Masker and dependencies
RUN mkdir /opt/trf && \
cd /opt/trf && \
wget http://tandem.bu.edu/trf/downloads/trf407b.linux64 && \
chmod +x trf*.linux64 && \
ln -s trf*.linux64 trf
# Turns out we need makeblastdb as well as rmblastn (https://github.com/PapenfussLab/gridss/issues/535)
RUN mkdir /opt/rmblast && \
cd /opt/rmblast && \
wget http://www.repeatmasker.org/rmblast-2.11.0+-x64-linux.tar.gz && \
tar --no-anchored --strip-components 2 -xvzf rmblast-2.11.0+-x64-linux.tar.gz rmblastn makeblastdb && \
rm rmblast-2.11.0+-x64-linux.tar.gz
RUN cd /opt/ && \
wget http://www.repeatmasker.org/RepeatMasker/RepeatMasker-4.1.2-p1.tar.gz && \
tar zxf RepeatMasker-*.tar.gz && \
rm RepeatMasker-*.tar.gz
### Kraken2 and dependencies
# dustmasker from e-direct: (or is this in ncbi-blast as well?)
RUN mkdir /opt/blast && \
cd /opt/blast && \
wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.11.0/ncbi-blast-2.11.0+-x64-linux.tar.gz && \
tar zxf ncbi-blast-*.tar.gz && \
mv ncbi-blast-*/bin/* . && \
rm -r ncbi-blast-*
ENV KRAKEN_VERSION=2.1.2
RUN mkdir /opt/kraken2 && \
cd /opt/kraken2 && \
wget https://github.com/DerrickWood/kraken2/archive/refs/tags/v$KRAKEN_VERSION.tar.gz && \
tar zxf v*.tar.gz && \
cd kraken2* && \
./install_kraken2.sh /opt/kraken2 && \
cd .. && \
rm -r kraken2-$KRAKEN_VERSION v*.tar.gz
RUN sh -c "$(wget -q ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/install-edirect.sh -O -)" && \
mv $HOME/edirect /opt/edirect
ENV PATH="/opt/gridss/:/opt/RepeatMasker:/opt/rmblast/:/opt/trf:/opt/kraken2:/opt/blast:/opt/edirect:$PATH"
# configure repeatmasker
RUN cd /opt/RepeatMasker && \
perl configure \
-default_search_engine rmblast \
-rmblast_dir /opt/rmblast \
-trf_prgm /opt/trf/trf \
-hmmer_dir /usr/local/bin
# R packages used by GRIDSS - R package need the C toolchain installed
ENV R_INSTALL_STAGED=false
RUN Rscript -e 'options(Ncpus=8L, repos="https://cloud.r-project.org/");install.packages(c( "tidyverse", "assertthat", "testthat", "randomForest", "stringdist", "stringr", "argparser", "R.cache", "BiocManager", "Rcpp", "blob", "RSQLite" ))'
RUN Rscript -e 'options(Ncpus=8L, repos="https://cloud.r-project.org/");BiocManager::install(ask=FALSE, pkgs=c( "copynumber", "StructuralVariantAnnotation", "VariantAnnotation", "rtracklayer", "BSgenome", "Rsamtools", "biomaRt", "org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg19.knownGene", "TxDb.Hsapiens.UCSC.hg38.knownGene" ))'
# Install GRIDSS
ARG GRIDSS_VERSION
ENV GRIDSS_VERSION=${GRIDSS_VERSION}
ENV GRIDSS_JAR=/opt/gridss/gridss-${GRIDSS_VERSION}-gridss-jar-with-dependencies.jar
LABEL software="GRIDSS"
LABEL software.version="$GRIDSS_VERSION"
LABEL about.summary="Genomic Rearrangement IDentification Software Suite"
LABEL about.home="https://github.com/PapenfussLab/gridss"
LABEL about.tags="Genomics"
RUN mkdir /opt/gridss/ /data
COPY --from=gridss_builder_c /opt/gridss/gridsstools /opt/gridss/
COPY --from=gridss_builder_java /opt/gridss/gridss-${GRIDSS_VERSION}-gridss-jar-with-dependencies.jar /opt/gridss/
COPY scripts/gridss \
scripts/gridss_annotate_vcf_kraken2 \
scripts/gridss_annotate_vcf_repeatmasker \
scripts/gridss_extract_overlapping_fragments \
scripts/gridss_somatic_filter \
scripts/virusbreakend \
scripts/virusbreakend-build \
scripts/gridss.config.R \
scripts/libgridss.R \
/opt/gridss/
RUN chmod +x /opt/gridss/* && \
chmod -x /opt/gridss/*.R
WORKDIR /data/
# Copy build artifact locally
FROM scratch AS gridss_export_build_artefacts
ARG GRIDSS_VERSION
COPY --from=gridss /opt/gridss/* /