2023-05-01 22:57:48 +02:00
|
|
|
# Main stage
|
2023-12-31 15:54:34 +01:00
|
|
|
FROM alpine:3.19.0
|
2023-12-10 23:02:30 +01:00
|
|
|
|
2023-12-11 00:06:35 +01:00
|
|
|
# JDK for app
|
2023-12-31 15:54:34 +01:00
|
|
|
RUN apk add --no-cache \
|
|
|
|
ca-certificates \
|
|
|
|
tzdata \
|
|
|
|
tini \
|
|
|
|
bash \
|
|
|
|
curl \
|
2024-01-10 22:56:30 +01:00
|
|
|
openjdk17-jre && \
|
2023-12-11 00:06:35 +01:00
|
|
|
# Doc conversion
|
2024-01-10 22:56:30 +01:00
|
|
|
apk add --no-cache libreoffice --repository http://dl-cdn.alpinelinux.org/alpine/edge/community && \
|
2023-12-11 00:06:35 +01:00
|
|
|
# OCR MY PDF (unpaper for descew and other advanced featues)
|
2024-01-10 22:56:30 +01:00
|
|
|
apk add --no-cache \
|
2023-12-10 23:02:30 +01:00
|
|
|
ocrmypdf \
|
2023-12-31 15:54:34 +01:00
|
|
|
tesseract-ocr-data-eng \
|
|
|
|
# CV
|
|
|
|
py3-opencv \
|
|
|
|
# python3/pip
|
|
|
|
python3 && \
|
|
|
|
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
|
|
|
# uno unoconv and HTML
|
|
|
|
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \
|
2024-01-31 20:59:07 +01:00
|
|
|
mv /usr/share/tessdata /usr/share/tessdata-original && \
|
|
|
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
|
|
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
|
|
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories
|