-
Notifications
You must be signed in to change notification settings - Fork 11
/
Dockerfile.software
152 lines (116 loc) · 5.5 KB
/
Dockerfile.software
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# -------------------
# build builder image
# -------------------
FROM openjdk:17-jdk-slim as builder
USER root
RUN apt-get update && \
apt-get -y --no-install-recommends install unzip
WORKDIR /opt/grobid-source
# gradle
COPY gradle/ ./gradle/
COPY gradlew ./
COPY gradle.properties ./
COPY build.gradle ./
COPY settings.gradle ./
# source
COPY software-mentions/ ./software-mentions/
COPY grobid-home/ ./grobid-home/
COPY grobid-core/ ./grobid-core/
COPY grobid-service/ ./grobid-service/
COPY grobid-trainer/ ./grobid-trainer/
# cleaning unused native libraries before packaging
RUN rm -rf grobid-home/pdf2xml
RUN rm -rf grobid-home/pdfalto/lin-32
RUN rm -rf grobid-home/pdfalto/mac-64
RUN rm -rf grobid-home/pdfalto/win-*
RUN rm -rf grobid-home/lib/lin-32
RUN rm -rf grobid-home/lib/win-*
RUN rm -rf grobid-home/lib/mac-64
# cleaning unused datasets stuff
RUN rm -rf software-mentions/resources/dataset
RUN ./gradlew clean assemble install --no-daemon --info --stacktrace
WORKDIR ./software-mentions/
RUN ./gradlew clean install --no-daemon --info --stacktrace
WORKDIR /opt/grobid
RUN unzip -o /opt/grobid-source/grobid-service/build/distributions/grobid-service-*.zip && \
mv grobid-service* grobid-service
RUN unzip -o /opt/grobid-source/grobid-home/build/distributions/grobid-home-*.zip && \
chmod -R 755 /opt/grobid/grobid-home/pdfalto
RUN rm -rf grobid-source
# -------------------
# build runtime image
# -------------------
# use NVIDIA Container Toolkit to automatically recognize possible GPU drivers on the host machine
FROM tensorflow/tensorflow:2.7.0-gpu
# setting locale is likely useless but to be sure
ENV LANG C.UTF-8
# update NVIDIA Cuda key (following a key rotation in April 2022)
RUN apt-get install -y wget
RUN apt-key del 7fa2af80
RUN rm /etc/apt/sources.list.d/cuda.list
RUN rm /etc/apt/sources.list.d/nvidia-ml.list
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
RUN dpkg -i cuda-keyring_1.0-1_all.deb
# install JRE, python and other dependencies
RUN apt-get update && \
apt-get -y --no-install-recommends install apt-utils build-essential gcc libxml2 libfontconfig unzip curl \
openjdk-17-jre-headless openjdk-17-jdk ca-certificates-java \
musl gfortran \
python3 python3-pip python3-setuptools python3-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /opt/grobid
COPY --from=builder /opt/grobid .
RUN python3 -m pip install pip --upgrade
# install DeLFT via pypi
RUN pip3 install requests delft==0.3.3
# link the data directory to /data
# the current working directory will most likely be /opt/grobid
RUN mkdir -p /data \
&& ln -s /data /opt/grobid/data \
&& ln -s /data ./data
# disable python warnings (and fix logging)
ENV PYTHONWARNINGS="ignore"
WORKDIR /opt/grobid
ENV JAVA_OPTS=-Xmx4g
# install jep (and temporarily the matching JDK)
ENV JDK_URL=https://download.java.net/java/GA/jdk17.0.2/dfd4a8d0985749f896bed50d7138ee7f/8/GPL/openjdk-17.0.2_linux-x64_bin.tar.gz
RUN curl --fail --show-error --location -q ${JDK_URL} -o /tmp/openjdk.tar.gz
RUN mkdir /tmp/jdk-17
RUN tar xvfz /tmp/openjdk.tar.gz --directory /tmp/jdk-17 --strip-components 1 --no-same-owner
RUN /tmp/jdk-17/bin/javac -version
RUN JAVA_HOME=/tmp/jdk-17 pip3 install jep==4.0.2
RUN rm -f /tmp/openjdk.tar.gz
RUN rm -rf /tmp/jdk-17
ENV LD_LIBRARY_PATH=/usr/local/lib/python3.8/dist-packages/jep:grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep:${LD_LIBRARY_PATH}
# remove libjep.so because we are providing our own version in the virtual env above
RUN rm /opt/grobid/grobid-home/lib/lin-64/jep/libjep.so
# preload embeddings
COPY --from=builder /opt/grobid-source/grobid-home/scripts/preload_embeddings.py .
COPY --from=builder /opt/grobid-source/grobid-home/config/resources-registry.json .
#RUN python3 preload_embeddings.py --embedding word2vec --registry ./resources-registry.json
RUN ln -s /opt/grobid /opt/delft
COPY --from=builder /opt/grobid-source/software-mentions /opt/grobid/software-mentions
COPY --from=builder /root/.m2/repository/org /opt/grobid/software-mentions/lib/org
# install Pub2TEI
WORKDIR /opt/
RUN wget https://github.com/kermitt2/Pub2TEI/archive/refs/heads/master.zip
RUN unzip master.zip
RUN mv Pub2TEI-master Pub2TEI
WORKDIR /opt/grobid/software-mentions
RUN mkdir /opt/grobid/delft
RUN mkdir /opt/grobid/delft/delft
COPY --from=builder /opt/grobid-source/grobid-home/config/resources-registry.json /opt/grobid/delft/delft/resources-registry.json
# trigger gradle wrapper install
RUN ./gradlew --version
# install all the ML models
RUN ./gradlew copyModels installModels && rm -rf resources/models && rm -f /opt/grobid/grobid-home/models/software/model.wapiti.gz && rm -f /opt/grobid/grobid-home/models/software-BERT-0.3.2.zip && rm -f /opt/grobid/grobid-home/models/context_bert-0.3.2.zip && rm -f /opt/grobid/grobid-home/models/context_used_bert-0.3.2.zip && rm -f /opt/grobid/grobid-home/models/context_shared_bert-0.3.2.zip && rm -f /opt/grobid/grobid-home/models/context_creation_bert-0.3.2.zip
RUN ./gradlew clean assemble install --no-daemon --stacktrace --info -x test
CMD ["sh", "-c", "java --add-opens java.base/java.lang=ALL-UNNAMED -jar build/libs/software-mentions-0.8.0-onejar.jar server resources/config/config.yml"]
ARG GROBID_VERSION
LABEL \
authors="The contributors" \
org.label-schema.name="software-mentions" \
org.label-schema.description="Image with software-mentions service" \
org.label-schema.url="https://github.com/softcite/software-mentions" \
org.label-schema.version=${GROBID_VERSION}