forked from guiem/train-tesseract
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
42 lines (33 loc) · 1.48 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# Set docker image
FROM ubuntu:18.04
# Skip the configuration part
ENV DEBIAN_FRONTEND noninteractive
# Update and install depedencies
RUN apt-get update && \
apt-get install -y wget unzip bc vim python3-pip libleptonica-dev git
# Packages to complie Tesseract
RUN apt-get install -y --reinstall make && \
apt-get install -y g++ autoconf automake libtool pkg-config libpng-dev libjpeg8-dev libtiff5-dev libicu-dev \
libpango1.0-dev autoconf-archive
# Set working directory
WORKDIR /app
# Copy requirements into the container at /app
COPY requirements.txt ./
# Getting tesstrain: beware the source might change or not being available
# Complie Tesseract with training options (also feel free to update Tesseract versions and such!)
# Getting data: beware the source might change or not being available
RUN mkdir src && cd /app/src && \
wget https://github.com/tesseract-ocr/tesseract/archive/4.1.0.zip && \
unzip 4.1.0.zip && \
cd /app/src/tesseract-4.1.0 && ./autogen.sh && ./configure && make && make install && ldconfig && \
make training && make training-install && \
cd /usr/local/share/tessdata && wget https://github.com/tesseract-ocr/tessdata_best/raw/master/eng.traineddata
# Setting the data prefix
ENV TESSDATA_PREFIX=/usr/local/share/tessdata
# Install libraries using pip installer
RUN pip3 install -r requirements.txt
# Set the locale
RUN apt-get install -y locales && locale-gen en_US.UTF-8
ENV LC_ALL=en_US.UTF-8
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US.UTF-8