Skip to content

Commit

Permalink
Merge pull request kermitt2#1255 from kermitt2/docker-evaluation-image
Browse files Browse the repository at this point in the history
Enable running end 2 end evaluation via a docker container
  • Loading branch information
lfoppiano authored Feb 26, 2025
2 parents 8b9d113 + 419715e commit d4d31af
Show file tree
Hide file tree
Showing 3 changed files with 480 additions and 0 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/ci-build-manual-eval.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Build and push a full docker image

on:
workflow_dispatch:
inputs:
custom_tag:
type: string
description: Docker image tag
required: true
default: "latest"

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
java-version: '17.0.10+7'
distribution: 'temurin'
cache: 'gradle'
- name: Build with Gradle
run: ./gradlew build -x test

docker-build-full:
needs: [ build ]
runs-on: ubuntu-latest

steps:
- name: Create more disk space
run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/checkout@v4
- name: Build and push
id: docker_build
uses: mr-smithers-excellent/docker-build-push@v5
with:
username: ${{ secrets.DOCKERHUB_USERNAME_LFOPPIANO }}
password: ${{ secrets.DOCKERHUB_TOKEN_LFOPPIANO }}
image: lfoppiano/grobid-evaluation
registry: docker.io
pushImage: true
tags: latest, ${{ github.event.inputs.custom_tag}}
dockerfile: Dockerfile.evaluation
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
63 changes: 63 additions & 0 deletions Dockerfile.evaluation
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
## Grobid evaluation image
## ------
# https://grobid.readthedocs.io/en/latest/End-to-end-evaluation/
# NOTE: To match the exact evaluation published in the Grobid documentation is necessary to have a running Biblio-glutton instance

FROM lfoppiano/grobid:0.8.2-RC1-full as runtime

# setting locale is likely useless but to be sure
ENV LANG C.UTF-8

USER root

RUN apt-get update && \
apt-get -y --no-install-recommends install unzip wget

WORKDIR /opt/grobid

# gradle
COPY gradle/ ./gradle/
COPY gradlew ./
COPY gradle.properties ./
COPY build.gradle ./
COPY settings.gradle ./

# source
COPY grobid-core/ ./grobid-core/
COPY grobid-service/ ./grobid-service/
COPY grobid-trainer/ ./grobid-trainer/

# Setting DL-powered configuration
COPY grobid-home/config/grobid-evaluation.yaml grobid-home/config/config.yaml

RUN rm -rf /opt/grobid/grobid-home/models/*-with_ELMo \
&& mkdir /opt/grobid/evaluation

# Download evaluation data
WORKDIR /opt/grobid/evaluation
RUN wget https://zenodo.org/records/3873702/files/biorxiv-10k-test-2000.zip -O biorxiv-10k-test-2000.zip \
&& unzip biorxiv-10k-test-2000.zip -d biorxiv-10k-test-2000 \
&& wget https://zenodo.org/records/7708580/files/eLife_984.zip -O eLife_984.zip \
&& unzip eLife_984.zip -d eLife_984 \
&& wget https://zenodo.org/records/7708580/files/PLOS_1000.zip -O PLOS_1000.zip \
&& unzip PLOS_1000.zip -d PLOS_1000 \
&& wget https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
&& unzip PMC_sample_1943.zip -d PMC_sample_1943 \
&& rm *.zip

#RUN wget -q https://zenodo.org/records/7708580/files/PMC_sample_1943.zip -O PMC_sample_1943.zip \
# && unzip PMC_sample_1943.zip -d PMC_sample_1943 \
# && rm *.zip

VOLUME ["/opt/grobid/grobid-home/tmp"]

WORKDIR /opt/grobid

CMD ["/bin/bash", "-c", "./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PMC_sample_1943 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/eLife_984 -Prun=1 -PfileRatio=1; ./gradlew jatsEval -Pp2t=/opt/grobid/evaluation/PLOS_1000 -Prun=1 -PfileRatio=1;"]

LABEL \
authors="The contributors" \
org.label-schema.name="Grobid" \
org.label-schema.description="Image running the Grobid End 2 end evaluation" \
org.label-schema.url="https://github.com/kermitt2/Grobid" \
org.label-schema.version=${GROBID_VERSION}
Loading

0 comments on commit d4d31af

Please sign in to comment.