Stirling-PDF/DockerfileBase

48 lines
1.5 KiB
Text
Raw Normal View History

2023-05-01 22:57:48 +02:00
# Main stage
2023-12-10 23:02:30 +01:00
FROM ubuntu:latest AS base
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2
RUN add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr
RUN apt-get update && \
apt-get install -y --no-install-recommends \
2023-12-10 23:02:30 +01:00
openjdk-17-jre \
2023-05-18 00:58:15 +02:00
libreoffice-core-nogui \
libreoffice-common \
2023-05-18 00:58:15 +02:00
libreoffice-writer-nogui \
libreoffice-calc-nogui \
libreoffice-impress-nogui \
python3-uno \
2023-12-10 23:02:30 +01:00
ghostscript \
2023-05-01 22:57:48 +02:00
python3-pip \
2023-12-10 23:02:30 +01:00
ocrmypdf \
unoconv && \
2023-12-10 15:09:28 +01:00
pip install --upgrade pip && \
2023-12-10 23:02:30 +01:00
pip install --no-cache-dir --user --upgrade ocrmypdf && \
pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1 \
pip install --no-cache-dir --upgrade ocrmypdf && \
2023-12-10 15:09:28 +01:00
pip install --no-cache-dir \
opencv-python-headless && \
2023-05-01 22:57:48 +02:00
rm -rf /var/lib/apt/lists/* && \
mkdir /usr/share/tesseract-ocr-original && \
cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \
rm -rf /usr/share/tesseract-ocr
2023-12-10 23:02:30 +01:00
2023-05-01 22:57:48 +02:00
# Python packages stage
FROM base AS python-packages
2023-12-10 15:09:28 +01:00
# Install build tools and Python libraries
2023-05-01 22:57:48 +02:00
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
libffi-dev \
libssl-dev \
zlib1g-dev \
2023-12-10 15:09:28 +01:00
libjpeg-dev
2023-05-01 22:57:48 +02:00
# Final stage: Copy necessary files from the previous stage
FROM base
2023-12-10 15:09:28 +01:00
COPY --from=python-packages /usr/local /usr/local