switch images to alpine

Signed-off-by: Zoey <zoey@z0ey.de>
This commit is contained in:
Zoey 2023-12-31 15:54:34 +01:00
parent cda8f7b27d
commit b043e666ae
8 changed files with 96 additions and 125 deletions

View file

@ -11,37 +11,30 @@ ENV DOCKER_ENABLE_SECURITY=false \
# PUID=1000 \ # PUID=1000 \
# PGID=1000 \ # PGID=1000 \
# UMASK=022 \ # UMASK=022 \
# Copy necessary files
COPY scripts /scripts
COPY pipeline /pipeline
COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto
COPY build/libs/*.jar app.jar
# Create user and group # Create user and group
##RUN groupadd -g $PGID stirlingpdfgroup && \ ##RUN groupadd -g $PGID stirlingpdfgroup && \
## useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \ ## useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
## mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME ## mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME && \
# Set up necessary directories and permissions # Set up necessary directories and permissions
RUN mkdir -p /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /logs /customFiles /pipeline /pipeline/defaultWebUIConfigs /pipeline/watchedFolders /pipeline/finishedFolders RUN mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
##&& \ ##&& \
## chown -R stirlingpdfuser:stirlingpdfgroup /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /customFiles && \ ## chown -R stirlingpdfuser:stirlingpdfgroup /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /customFiles && \
## chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/tesseract-ocr-original ## chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/tesseract-ocr-original && \
# Copy necessary files
COPY ./scripts/* /scripts/
COPY ./pipeline/ /pipeline/
COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto/
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto/
COPY build/libs/*.jar app.jar
# Set font cache and permissions # Set font cache and permissions
RUN fc-cache -f -v && chmod +x /scripts/* fc-cache -f -v && \
chmod +x /scripts/*
##&& \
## chown stirlingpdfuser:stirlingpdfgroup /app.jar && \ ## chown stirlingpdfuser:stirlingpdfgroup /app.jar && \
## chmod +x /scripts/init.sh ## chmod +x /scripts/init.sh
# Expose necessary ports
EXPOSE 8080
# Set user and run command # Set user and run command
##USER stirlingpdfuser ##USER stirlingpdfuser
ENTRYPOINT ["/scripts/init.sh"] ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"] CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]

View file

@ -1,65 +1,59 @@
# Build jbig2enc in a separate stage # use alpine
FROM bellsoft/liberica-openjdk-debian:17 FROM alpine:latest
ARG VERSION_TAG ARG VERSION_TAG
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libreoffice-core \
libreoffice-common \
libreoffice-writer \
libreoffice-calc \
libreoffice-impress \
unoconv && \
rm -rf /var/lib/apt/lists/*
# Set Environment Variables # Set Environment Variables
ENV DOCKER_ENABLE_SECURITY=false \ ENV DOCKER_ENABLE_SECURITY=false \
HOME=/home/stirlingpdfuser \ HOME=/home/stirlingpdfuser \
VERSION_TAG=$VERSION_TAG \ VERSION_TAG=$VERSION_TAG \
JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -XX:MaxRAMPercentage=75" JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -XX:MaxRAMPercentage=75"
# PUID=1000 \ # PUID=1000 \
# PGID=1000 \ # PGID=1000 \
# UMASK=022 \ # UMASK=022 \
# Copy necessary files
COPY scripts/download-security-jar.sh /scripts/download-security-jar.sh
COPY scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
COPY pipeline /pipeline
COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto
COPY build/libs/*.jar app.jar
RUN apk add --no-cache \
ca-certificates \
tzdata \
tini \
bash \
curl \
openjdk17-jre \
# Doc conversion
libreoffice-common \
libreoffice-writer \
libreoffice-calc \
libreoffice-impress \
# python and pip
python3 && \
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
# uno unoconv and HTML
pip install --break-system-packages --no-cache-dir --upgrade uno unoconv WeasyPrint && \
# Create user and group # Create user and group
#RUN groupadd -g $PGID stirlingpdfgroup && \ #RUN groupadd -g $PGID stirlingpdfgroup && \
# useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \ # useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
# mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME # mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
# Set up necessary directories and permissions # Set up necessary directories and permissions
RUN mkdir -p /scripts /usr/share/fonts/opentype/noto /configs /customFiles /logs /pipeline /pipeline/defaultWebUIConfigs /pipeline/watchedFolders /pipeline/finishedFolders mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
# chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/fonts/opentype/noto /configs /customFiles # chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/fonts/opentype/noto /configs /customFiles
# Copy necessary files
COPY ./scripts/download-security-jar.sh /scripts/download-security-jar.sh
COPY ./scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
COPY ./pipeline/ /pipeline/
COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto/
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto/
COPY build/libs/*.jar app.jar
# Set font cache and permissions # Set font cache and permissions
RUN fc-cache -f -v && \ fc-cache -f -v && \
chmod +x /scripts/init-without-ocr.sh && \ chmod +x /scripts/*.sh
chmod +x /scripts/download-security-jar.sh
# chown stirlingpdfuser:stirlingpdfgroup /app.jar # chown stirlingpdfuser:stirlingpdfgroup /app.jar
# Expose the application port
EXPOSE 8080
# Set environment variables # Set environment variables
ENV ENDPOINTS_GROUPS_TO_REMOVE=Python,OpenCV,OCRmyPDF ENV ENDPOINTS_GROUPS_TO_REMOVE=Python,OpenCV,OCRmyPDF
ENV DOCKER_ENABLE_SECURITY=false ENV DOCKER_ENABLE_SECURITY=false
# Run the application # Run the application
#USER stirlingpdfuser #USER stirlingpdfuser
ENTRYPOINT ["/scripts/init-without-ocr.sh"] ENTRYPOINT ["tini", "--", "/scripts/init-without-ocr.sh"]
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"] CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]

View file

@ -1,5 +1,5 @@
# Build jbig2enc in a separate stage # use alpine
FROM bellsoft/liberica-openjdk-alpine:17 FROM alpine:latest
ARG VERSION_TAG ARG VERSION_TAG
@ -8,39 +8,38 @@ ENV DOCKER_ENABLE_SECURITY=false \
HOME=/home/stirlingpdfuser \ HOME=/home/stirlingpdfuser \
VERSION_TAG=$VERSION_TAG \ VERSION_TAG=$VERSION_TAG \
JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -XX:MaxRAMPercentage=75" JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -XX:MaxRAMPercentage=75"
# PUID=1000 \ # PUID=1000 \
# PGID=1000 \ # PGID=1000 \
# UMASK=022 \ # UMASK=022 \
# Copy necessary files
COPY scripts/download-security-jar.sh /scripts/download-security-jar.sh
COPY scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
COPY pipeline /pipeline
COPY build/libs/*.jar app.jar
# Create user and group using Alpine's addgroup and adduser # Create user and group using Alpine's addgroup and adduser
#RUN addgroup -g $PGID stirlingpdfgroup && \ #RUN addgroup -g $PGID stirlingpdfgroup && \
# adduser -u $PUID -G stirlingpdfgroup -s /bin/sh -D stirlingpdfuser && \ # adduser -u $PUID -G stirlingpdfgroup -s /bin/sh -D stirlingpdfuser && \
# mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME # mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
# Set up necessary directories and permissions # Set up necessary directories and permissions
#RUN mkdir -p /scripts /configs /customFiles && \ #RUN mkdir -p /scripts /configs /customFiles && \
# chown -R stirlingpdfuser:stirlingpdfgroup /scripts /configs /customFiles /logs /pipeline /pipeline/defaultWebUIConfigs /pipeline/watchedFolders /pipeline/finishedFolders # chown -R stirlingpdfuser:stirlingpdfgroup /scripts /configs /customFiles /logs /pipeline /pipeline/defaultWebUIConfigs /pipeline/watchedFolders /pipeline/finishedFolders
RUN mkdir /configs /logs /customFiles && \
RUN mkdir -p /scripts /usr/share/fonts/opentype/noto /configs /customFiles
COPY ./scripts/download-security-jar.sh /scripts/download-security-jar.sh
COPY ./scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
COPY ./pipeline/ /pipeline/
COPY build/libs/*.jar app.jar
# Set font cache and permissions # Set font cache and permissions
#RUN chown stirlingpdfuser:stirlingpdfgroup /app.jar #RUN chown stirlingpdfuser:stirlingpdfgroup /app.jar
chmod +x /scripts/*.sh && \
RUN chmod +x /scripts/init-without-ocr.sh && \ apk add --no-cache \
chmod +x /scripts/download-security-jar.sh && \ ca-certificates \
apk add --no-cache curl tzdata \
tini \
# Expose the application port bash \
EXPOSE 8080 curl
# Set environment variables # Set environment variables
ENV ENDPOINTS_GROUPS_TO_REMOVE=CLI ENV ENDPOINTS_GROUPS_TO_REMOVE=CLI
ENTRYPOINT ["/scripts/init-without-ocr.sh"] ENTRYPOINT ["tini", "--", "/scripts/init-without-ocr.sh"]
# Run the application # Run the application
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"] CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]

View file

@ -1,43 +1,24 @@
# Main stage # Main stage
FROM ubuntu:latest AS base FROM alpine:3.19.0
# JDK for app # JDK for app
RUN apt-get update && \ RUN apk add --no-cache \
apt-get install -y --no-install-recommends \ ca-certificates \
openjdk-17-jre && \ tzdata \
rm -rf /var/lib/apt/lists/* tini \
bash \
curl \
openjdk17-jre \
# Doc conversion # Doc conversion
RUN apt-get update && \ libreoffice \
apt-get install -y --no-install-recommends \
libreoffice-core \
libreoffice-common \
libreoffice-writer \
libreoffice-calc \
libreoffice-impress \
python3-uno \
curl \
unoconv && \
rm -rf /var/lib/apt/lists/*
# OCR MY PDF (unpaper for descew and other advanced featues) # OCR MY PDF (unpaper for descew and other advanced featues)
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2 && \
add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr && \
apt-get update && \
apt-get install -y --no-install-recommends \
ghostscript \
python3-pip \
ocrmypdf \ ocrmypdf \
unpaper && \ tesseract-ocr-data-eng \
rm -rf /var/lib/apt/lists/* && \ # CV
mv /usr/share/tesseract-ocr /usr/share/tesseract-ocr-original && \ py3-opencv \
pip install --no-cache-dir --upgrade pip && \ # python3/pip
pip install --no-cache-dir --upgrade ocrmypdf && \ python3 && \
pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1 wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
# uno unoconv and HTML
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \
#CV and HTML mv /usr/share/tessdata /usr/share/tessdata-original
RUN pip install --no-cache-dir opencv-python-headless WeasyPrint

View file

@ -21,7 +21,7 @@ Depending on your requirements, you can choose the appropriate language pack for
### Installing Language Packs ### Installing Language Packs
1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need. 1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/5/tessdata` (Debian) or `/usr/share/tesseract/tessdata` (Fedora) 2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tessdata`
# DO NOT REMOVE EXISTING ENG.TRAINEDDATA, IT'S REQUIRED. # DO NOT REMOVE EXISTING ENG.TRAINEDDATA, IT'S REQUIRED.
@ -37,14 +37,14 @@ services:
your_service_name: your_service_name:
image: your_docker_image_name image: your_docker_image_name
volumes: volumes:
- /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata - /location/of/trainingData:/usr/share/tessdata
``` ```
#### Docker run #### Docker run
Add the following to your existing docker run command Add the following to your existing docker run command
```bash ```bash
-v /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata -v /location/of/trainingData:/usr/share/tessdata
``` ```
#### Non-Docker #### Non-Docker

View file

@ -139,7 +139,7 @@ Easiest is to use the langpacks provided by your repositories. Skip the other st
Manual: Manual:
1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need. 1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/5/tessdata` 2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tessdata`
3. 3.
Please view [OCRmyPDF install guide](https://ocrmypdf.readthedocs.io/en/latest/installation.html) for more info. Please view [OCRmyPDF install guide](https://ocrmypdf.readthedocs.io/en/latest/installation.html) for more info.
**IMPORTANT:** DO NOT REMOVE EXISTING `eng.traineddata`, IT'S REQUIRED. **IMPORTANT:** DO NOT REMOVE EXISTING `eng.traineddata`, IT'S REQUIRED.

View file

@ -110,7 +110,7 @@ Docker Run
```bash ```bash
docker run -d \ docker run -d \
-p 8080:8080 \ -p 8080:8080 \
-v /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata \ -v /location/of/trainingData:/usr/share/tessdata \
-v /location/of/extraConfigs:/configs \ -v /location/of/extraConfigs:/configs \
-v /location/of/logs:/logs \ -v /location/of/logs:/logs \
-e DOCKER_ENABLE_SECURITY=false \ -e DOCKER_ENABLE_SECURITY=false \
@ -131,7 +131,7 @@ services:
ports: ports:
- '8080:8080' - '8080:8080'
volumes: volumes:
- /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata #Required for extra OCR languages - /location/of/trainingData:/usr/share/tessdata #Required for extra OCR languages
- /location/of/extraConfigs:/configs - /location/of/extraConfigs:/configs
# - /location/of/customFiles:/customFiles/ # - /location/of/customFiles:/customFiles/
# - /location/of/logs:/logs/ # - /location/of/logs:/logs/

View file

@ -3,10 +3,14 @@
# Copy the original tesseract-ocr files to the volume directory without overwriting existing files # Copy the original tesseract-ocr files to the volume directory without overwriting existing files
echo "Copying original files without overwriting existing files" echo "Copying original files without overwriting existing files"
mkdir -p /usr/share/tesseract-ocr mkdir -p /usr/share/tesseract-ocr
cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tesseract-ocr/5/tessdata/ || true; cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tessdata || true;
fi
if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
fi fi
# Check if TESSERACT_LANGS environment variable is set and is not empty # Check if TESSERACT_LANGS environment variable is set and is not empty
@ -23,4 +27,4 @@ fi
/scripts/download-security-jar.sh /scripts/download-security-jar.sh
# Run the main command # Run the main command
exec "$@" exec "$@"