From c7939ec083065aee0309a15a03a876c5f3e34a26 Mon Sep 17 00:00:00 2001 From: lopez Date: Mon, 22 Feb 2021 01:26:45 +0100 Subject: [PATCH 1/2] aligned with PR #703 --- Dockerfile.delft | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/Dockerfile.delft b/Dockerfile.delft index c4c53c311b..6fe7319cf9 100644 --- a/Dockerfile.delft +++ b/Dockerfile.delft @@ -21,13 +21,10 @@ FROM openjdk:8u275-jdk as builder USER root RUN apt-get update && \ - apt-get -y --no-install-recommends install apt-utils libxml2 + apt-get -y --no-install-recommends install unzip WORKDIR /opt/grobid-source -RUN mkdir -p .gradle -VOLUME /opt/grobid-source/.gradle - # gradle COPY gradle/ ./gradle/ COPY gradlew ./ @@ -39,11 +36,15 @@ COPY settings.gradle ./ COPY grobid-home/ ./grobid-home/ COPY grobid-core/ ./grobid-core/ COPY grobid-service/ ./grobid-service/ -COPY grobid-trainer/ ./grobid-trainer/ RUN ./gradlew clean assemble --no-daemon --info --stacktrace -WORKDIR /opt +WORKDIR /opt/grobid +RUN unzip -o /opt/grobid-source/grobid-service/build/distributions/grobid-service-*.zip && \ + mv grobid-service* grobid-service +RUN unzip -o /opt/grobid-source/grobid-home/build/distributions/grobid-home-*.zip && \ + chmod -R 755 /opt/grobid/grobid-home/pdf2xml +RUN rm -rf grobid-source # ------------------- # build runtime image @@ -60,33 +61,25 @@ ENV LANG C.UTF-8 RUN apt-get update && \ apt-get -y --no-install-recommends install apt-utils build-essential gcc libxml2 unzip curl \ openjdk-8-jre-headless ca-certificates-java \ -# git \ musl gfortran \ python3 python3-pip python3-setuptools python3-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -WORKDIR /opt - -COPY --from=builder /opt/grobid-source/grobid-core/build/libs/grobid-core-*-onejar.jar ./grobid/grobid-core-onejar.jar -COPY --from=builder /opt/grobid-source/grobid-service/build/distributions/grobid-service-*.zip ./grobid-service.zip -COPY --from=builder /opt/grobid-source/grobid-home/build/distributions/grobid-home-*.zip ./grobid-home.zip +WORKDIR /opt/grobid -RUN unzip -o ./grobid-service.zip -d ./grobid && \ - mv ./grobid/grobid-service-* ./grobid/grobid-service +COPY --from=builder /opt/grobid . -RUN unzip ./grobid-home.zip -d ./grobid && \ - mkdir -p /opt/grobid/grobid-home/tmp -RUN rm *.zip RUN rm -rf /opt/grobid/grobid-home/pdf2xml/lin-32 RUN rm -rf /opt/grobid/grobid-home/pdf2xml/mac-64 RUN rm -rf /opt/grobid/grobid-home/pdf2xml/win-* RUN rm -rf /opt/grobid/grobid-home/lib/lin-32 RUN rm -rf /opt/grobid/grobid-home/lib/win-* -# below to allow logs to be written in the container +# below to allow logs to be written in the container (not advised for production!) # RUN mkdir -p logs +# the following might not be necessary - but should result in faster temp file write than directly in the container VOLUME ["/opt/grobid/grobid-home/tmp"] RUN python3 -m pip install pip --upgrade @@ -99,13 +92,6 @@ RUN mkdir -p /data \ && ln -s /data /opt/grobid/data \ && ln -s /data ./data -# install DeLFT by cloning the repo - only for dev time! -#RUN git clone https://github.com/kermitt2/delft -#WORKDIR /opt/delft -#RUN pip3 install -r requirements.txt -# cleaning useless delft data -#RUN rm -rf data/sequenceLabelling data/textClassification data/test data/models/sequenceLabelling data/models/textClassification .git - # disable python warnings (and fix logging) ENV PYTHONWARNINGS="ignore" @@ -119,8 +105,8 @@ ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini RUN chmod +x /tini ENTRYPOINT ["/tini", "-s", "--"] -RUN chmod -R 755 /opt/grobid/grobid-home/pdf2xml -RUN chmod 777 /opt/grobid/grobid-home/tmp +#RUN chmod -R 755 /opt/grobid/grobid-home/pdf2xml +#RUN chmod 777 /opt/grobid/grobid-home/tmp # install jep (and temporarily the matching JDK) ENV TEMP_JDK_HOME=/tmp/jdk-${JAVA_VERSION} From d225477fc46a1ff4ff97eca3b88890abf582dfb2 Mon Sep 17 00:00:00 2001 From: lopez Date: Sat, 20 Mar 2021 00:35:49 +0100 Subject: [PATCH 2/2] reduce image similarly as done with the crf-only one --- Dockerfile.delft | 15 +++++++++------ doc/Grobid-docker.md | 6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Dockerfile.delft b/Dockerfile.delft index 6fe7319cf9..1833a32149 100644 --- a/Dockerfile.delft +++ b/Dockerfile.delft @@ -36,6 +36,15 @@ COPY settings.gradle ./ COPY grobid-home/ ./grobid-home/ COPY grobid-core/ ./grobid-core/ COPY grobid-service/ ./grobid-service/ +COPY grobid-trainer/ ./grobid-trainer/ + +# cleaning unused native libraries before packaging +RUN rm -rf grobid-home/pdf2xml/lin-32 +RUN rm -rf grobid-home/pdf2xml/mac-64 +RUN rm -rf grobid-home/pdf2xml/win-* +RUN rm -rf grobid-home/lib/lin-32 +RUN rm -rf grobid-home/lib/win-* +RUN rm -rf grobid-home/lib/mac-64 RUN ./gradlew clean assemble --no-daemon --info --stacktrace @@ -76,12 +85,6 @@ RUN rm -rf /opt/grobid/grobid-home/pdf2xml/win-* RUN rm -rf /opt/grobid/grobid-home/lib/lin-32 RUN rm -rf /opt/grobid/grobid-home/lib/win-* -# below to allow logs to be written in the container (not advised for production!) -# RUN mkdir -p logs - -# the following might not be necessary - but should result in faster temp file write than directly in the container -VOLUME ["/opt/grobid/grobid-home/tmp"] - RUN python3 -m pip install pip --upgrade # install DeLFT via pypi diff --git a/doc/Grobid-docker.md b/doc/Grobid-docker.md index c58c24d908..8d36638bb1 100644 --- a/doc/Grobid-docker.md +++ b/doc/Grobid-docker.md @@ -50,7 +50,7 @@ The process for retrieving and running the image is as follow: > docker pull grobid/grobid:${latest_grobid_version} ``` -- Run the container (note the new version running on 8070, however it will be mapped on the 8080 of your host): +- Run the container: ```bash > docker run --rm --gpus all --init grobid/grobid:${latest_grobid_version} @@ -61,7 +61,7 @@ The image will automatically uses the GPU and CUDA version available on your hos To specify to use only certain GPUs (see the [nvidia container toolkit user guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#gpu-enumeration) for more details): ```bash -> docker run --rm --gpus '"device=1,2"' --init -p 8070:8080 -p 8071:8081 grobid/grobid:${latest_grobid_version} +> docker run --rm --gpus '"device=1,2"' --init -p 8080:8070 -p 8081:8071 grobid/grobid:${latest_grobid_version} ``` You can run the image on CPU by omitting the `-gpus` parameters. @@ -83,7 +83,7 @@ Grobid web services are then available as described in the [service documentatio The simplest way to pass a modified configuration to the docker image is to mount the property file `grobid.properties` when running the image. Modify the config file `grobid/grobid-home/config/grobid.properties` according to your requirements on the host machine and mount it when running the image as follow: ```bash -docker run --rm --gpus all --init -p 8070:8080 -p 8071:8081 -v /home/lopez/grobid/grobid-home/config/grobid.properties:/opt/grobid/grobid-home/config/grobid.properties:ro grobid/grobid:0.6.2-SNAPSHOT +docker run --rm --gpus all --init -p 8080:8070 -p 8081:8071 -v /home/lopez/grobid/grobid-home/config/grobid.properties:/opt/grobid/grobid-home/config/grobid.properties:ro grobid/grobid:0.6.2-SNAPSHOT ``` You need to use an absolute path to specify your modified `grobid.properties` file.