Stirling-PDF/DockerfileBase
2024-01-31 21:02:22 +01:00

25 lines
794 B
Text

# Main stage
FROM alpine:3.19.0
# JDK for app
RUN apk add --no-cache \
ca-certificates \
tzdata \
tini \
bash \
curl \
openjdk17-jre && \
# Doc conversion
apk add --no-cache libreoffice --repository http://dl-cdn.alpinelinux.org/alpine/edge/community && \
# OCR MY PDF (unpaper for descew and other advanced featues)
apk add --no-cache \
ocrmypdf \
tesseract-ocr-data-eng \
# CV
py3-opencv \
# python3/pip
python3 && \
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
# uno unoconv and HTML
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \
mv /usr/share/tessdata /usr/share/tessdata-original