switch images to alpine
Signed-off-by: Zoey <zoey@z0ey.de>
This commit is contained in:
parent
cda8f7b27d
commit
b043e666ae
8 changed files with 96 additions and 125 deletions
31
Dockerfile
31
Dockerfile
|
@ -12,36 +12,29 @@ ENV DOCKER_ENABLE_SECURITY=false \
|
||||||
# PGID=1000 \
|
# PGID=1000 \
|
||||||
# UMASK=022 \
|
# UMASK=022 \
|
||||||
|
|
||||||
|
# Copy necessary files
|
||||||
|
COPY scripts /scripts
|
||||||
|
COPY pipeline /pipeline
|
||||||
|
COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto
|
||||||
|
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto
|
||||||
|
COPY build/libs/*.jar app.jar
|
||||||
|
|
||||||
# Create user and group
|
# Create user and group
|
||||||
##RUN groupadd -g $PGID stirlingpdfgroup && \
|
##RUN groupadd -g $PGID stirlingpdfgroup && \
|
||||||
## useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
## useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
||||||
## mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
|
## mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME && \
|
||||||
|
|
||||||
# Set up necessary directories and permissions
|
# Set up necessary directories and permissions
|
||||||
RUN mkdir -p /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /logs /customFiles /pipeline /pipeline/defaultWebUIConfigs /pipeline/watchedFolders /pipeline/finishedFolders
|
RUN mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||||
##&& \
|
##&& \
|
||||||
## chown -R stirlingpdfuser:stirlingpdfgroup /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /customFiles && \
|
## chown -R stirlingpdfuser:stirlingpdfgroup /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /customFiles && \
|
||||||
## chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/tesseract-ocr-original
|
## chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/tesseract-ocr-original && \
|
||||||
|
|
||||||
# Copy necessary files
|
|
||||||
COPY ./scripts/* /scripts/
|
|
||||||
COPY ./pipeline/ /pipeline/
|
|
||||||
COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto/
|
|
||||||
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto/
|
|
||||||
COPY build/libs/*.jar app.jar
|
|
||||||
|
|
||||||
# Set font cache and permissions
|
# Set font cache and permissions
|
||||||
RUN fc-cache -f -v && chmod +x /scripts/*
|
fc-cache -f -v && \
|
||||||
|
chmod +x /scripts/*
|
||||||
##&& \
|
|
||||||
## chown stirlingpdfuser:stirlingpdfgroup /app.jar && \
|
## chown stirlingpdfuser:stirlingpdfgroup /app.jar && \
|
||||||
## chmod +x /scripts/init.sh
|
## chmod +x /scripts/init.sh
|
||||||
|
|
||||||
# Expose necessary ports
|
|
||||||
EXPOSE 8080
|
|
||||||
|
|
||||||
# Set user and run command
|
# Set user and run command
|
||||||
##USER stirlingpdfuser
|
##USER stirlingpdfuser
|
||||||
ENTRYPOINT ["/scripts/init.sh"]
|
ENTRYPOINT ["tini", "--", "/scripts/init.sh"]
|
||||||
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]
|
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]
|
||||||
|
|
|
@ -1,19 +1,8 @@
|
||||||
# Build jbig2enc in a separate stage
|
# use alpine
|
||||||
FROM bellsoft/liberica-openjdk-debian:17
|
FROM alpine:latest
|
||||||
|
|
||||||
ARG VERSION_TAG
|
ARG VERSION_TAG
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libreoffice-core \
|
|
||||||
libreoffice-common \
|
|
||||||
libreoffice-writer \
|
|
||||||
libreoffice-calc \
|
|
||||||
libreoffice-impress \
|
|
||||||
unoconv && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
|
|
||||||
# Set Environment Variables
|
# Set Environment Variables
|
||||||
ENV DOCKER_ENABLE_SECURITY=false \
|
ENV DOCKER_ENABLE_SECURITY=false \
|
||||||
HOME=/home/stirlingpdfuser \
|
HOME=/home/stirlingpdfuser \
|
||||||
|
@ -23,43 +12,48 @@ ENV DOCKER_ENABLE_SECURITY=false \
|
||||||
# PGID=1000 \
|
# PGID=1000 \
|
||||||
# UMASK=022 \
|
# UMASK=022 \
|
||||||
|
|
||||||
|
# Copy necessary files
|
||||||
|
COPY scripts/download-security-jar.sh /scripts/download-security-jar.sh
|
||||||
|
COPY scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
|
||||||
|
COPY pipeline /pipeline
|
||||||
|
COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto
|
||||||
|
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto
|
||||||
|
COPY build/libs/*.jar app.jar
|
||||||
|
|
||||||
|
RUN apk add --no-cache \
|
||||||
|
ca-certificates \
|
||||||
|
tzdata \
|
||||||
|
tini \
|
||||||
|
bash \
|
||||||
|
curl \
|
||||||
|
openjdk17-jre \
|
||||||
|
# Doc conversion
|
||||||
|
libreoffice-common \
|
||||||
|
libreoffice-writer \
|
||||||
|
libreoffice-calc \
|
||||||
|
libreoffice-impress \
|
||||||
|
# python and pip
|
||||||
|
python3 && \
|
||||||
|
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
||||||
|
# uno unoconv and HTML
|
||||||
|
pip install --break-system-packages --no-cache-dir --upgrade uno unoconv WeasyPrint && \
|
||||||
# Create user and group
|
# Create user and group
|
||||||
#RUN groupadd -g $PGID stirlingpdfgroup && \
|
#RUN groupadd -g $PGID stirlingpdfgroup && \
|
||||||
# useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
# useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
||||||
# mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
|
# mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
|
||||||
|
|
||||||
# Set up necessary directories and permissions
|
# Set up necessary directories and permissions
|
||||||
RUN mkdir -p /scripts /usr/share/fonts/opentype/noto /configs /customFiles /logs /pipeline /pipeline/defaultWebUIConfigs /pipeline/watchedFolders /pipeline/finishedFolders
|
mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||||
|
|
||||||
# chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/fonts/opentype/noto /configs /customFiles
|
# chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/fonts/opentype/noto /configs /customFiles
|
||||||
|
|
||||||
# Copy necessary files
|
|
||||||
COPY ./scripts/download-security-jar.sh /scripts/download-security-jar.sh
|
|
||||||
COPY ./scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
|
|
||||||
COPY ./pipeline/ /pipeline/
|
|
||||||
COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto/
|
|
||||||
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto/
|
|
||||||
COPY build/libs/*.jar app.jar
|
|
||||||
|
|
||||||
# Set font cache and permissions
|
# Set font cache and permissions
|
||||||
RUN fc-cache -f -v && \
|
fc-cache -f -v && \
|
||||||
chmod +x /scripts/init-without-ocr.sh && \
|
chmod +x /scripts/*.sh
|
||||||
chmod +x /scripts/download-security-jar.sh
|
|
||||||
|
|
||||||
|
|
||||||
# chown stirlingpdfuser:stirlingpdfgroup /app.jar
|
# chown stirlingpdfuser:stirlingpdfgroup /app.jar
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Expose the application port
|
|
||||||
EXPOSE 8080
|
|
||||||
|
|
||||||
# Set environment variables
|
# Set environment variables
|
||||||
ENV ENDPOINTS_GROUPS_TO_REMOVE=Python,OpenCV,OCRmyPDF
|
ENV ENDPOINTS_GROUPS_TO_REMOVE=Python,OpenCV,OCRmyPDF
|
||||||
ENV DOCKER_ENABLE_SECURITY=false
|
ENV DOCKER_ENABLE_SECURITY=false
|
||||||
|
|
||||||
# Run the application
|
# Run the application
|
||||||
#USER stirlingpdfuser
|
#USER stirlingpdfuser
|
||||||
ENTRYPOINT ["/scripts/init-without-ocr.sh"]
|
ENTRYPOINT ["tini", "--", "/scripts/init-without-ocr.sh"]
|
||||||
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]
|
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# Build jbig2enc in a separate stage
|
# use alpine
|
||||||
FROM bellsoft/liberica-openjdk-alpine:17
|
FROM alpine:latest
|
||||||
|
|
||||||
ARG VERSION_TAG
|
ARG VERSION_TAG
|
||||||
|
|
||||||
|
@ -12,35 +12,34 @@ ENV DOCKER_ENABLE_SECURITY=false \
|
||||||
# PGID=1000 \
|
# PGID=1000 \
|
||||||
# UMASK=022 \
|
# UMASK=022 \
|
||||||
|
|
||||||
|
# Copy necessary files
|
||||||
|
COPY scripts/download-security-jar.sh /scripts/download-security-jar.sh
|
||||||
|
COPY scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
|
||||||
|
COPY pipeline /pipeline
|
||||||
|
COPY build/libs/*.jar app.jar
|
||||||
|
|
||||||
# Create user and group using Alpine's addgroup and adduser
|
# Create user and group using Alpine's addgroup and adduser
|
||||||
#RUN addgroup -g $PGID stirlingpdfgroup && \
|
#RUN addgroup -g $PGID stirlingpdfgroup && \
|
||||||
# adduser -u $PUID -G stirlingpdfgroup -s /bin/sh -D stirlingpdfuser && \
|
# adduser -u $PUID -G stirlingpdfgroup -s /bin/sh -D stirlingpdfuser && \
|
||||||
# mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
|
# mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
|
||||||
|
|
||||||
# Set up necessary directories and permissions
|
# Set up necessary directories and permissions
|
||||||
#RUN mkdir -p /scripts /configs /customFiles && \
|
#RUN mkdir -p /scripts /configs /customFiles && \
|
||||||
# chown -R stirlingpdfuser:stirlingpdfgroup /scripts /configs /customFiles /logs /pipeline /pipeline/defaultWebUIConfigs /pipeline/watchedFolders /pipeline/finishedFolders
|
# chown -R stirlingpdfuser:stirlingpdfgroup /scripts /configs /customFiles /logs /pipeline /pipeline/defaultWebUIConfigs /pipeline/watchedFolders /pipeline/finishedFolders
|
||||||
|
RUN mkdir /configs /logs /customFiles && \
|
||||||
RUN mkdir -p /scripts /usr/share/fonts/opentype/noto /configs /customFiles
|
|
||||||
COPY ./scripts/download-security-jar.sh /scripts/download-security-jar.sh
|
|
||||||
COPY ./scripts/init-without-ocr.sh /scripts/init-without-ocr.sh
|
|
||||||
COPY ./pipeline/ /pipeline/
|
|
||||||
COPY build/libs/*.jar app.jar
|
|
||||||
|
|
||||||
# Set font cache and permissions
|
# Set font cache and permissions
|
||||||
#RUN chown stirlingpdfuser:stirlingpdfgroup /app.jar
|
#RUN chown stirlingpdfuser:stirlingpdfgroup /app.jar
|
||||||
|
chmod +x /scripts/*.sh && \
|
||||||
RUN chmod +x /scripts/init-without-ocr.sh && \
|
apk add --no-cache \
|
||||||
chmod +x /scripts/download-security-jar.sh && \
|
ca-certificates \
|
||||||
apk add --no-cache curl
|
tzdata \
|
||||||
|
tini \
|
||||||
# Expose the application port
|
bash \
|
||||||
EXPOSE 8080
|
curl
|
||||||
|
|
||||||
# Set environment variables
|
# Set environment variables
|
||||||
ENV ENDPOINTS_GROUPS_TO_REMOVE=CLI
|
ENV ENDPOINTS_GROUPS_TO_REMOVE=CLI
|
||||||
|
|
||||||
ENTRYPOINT ["/scripts/init-without-ocr.sh"]
|
ENTRYPOINT ["tini", "--", "/scripts/init-without-ocr.sh"]
|
||||||
|
|
||||||
# Run the application
|
# Run the application
|
||||||
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]
|
CMD ["java", "-Dfile.encoding=UTF-8", "-jar", "/app.jar"]
|
||||||
|
|
|
@ -1,43 +1,24 @@
|
||||||
# Main stage
|
# Main stage
|
||||||
FROM ubuntu:latest AS base
|
FROM alpine:3.19.0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# JDK for app
|
# JDK for app
|
||||||
RUN apt-get update && \
|
RUN apk add --no-cache \
|
||||||
apt-get install -y --no-install-recommends \
|
ca-certificates \
|
||||||
openjdk-17-jre && \
|
tzdata \
|
||||||
rm -rf /var/lib/apt/lists/*
|
tini \
|
||||||
|
bash \
|
||||||
# Doc conversion
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libreoffice-core \
|
|
||||||
libreoffice-common \
|
|
||||||
libreoffice-writer \
|
|
||||||
libreoffice-calc \
|
|
||||||
libreoffice-impress \
|
|
||||||
python3-uno \
|
|
||||||
curl \
|
curl \
|
||||||
unoconv && \
|
openjdk17-jre \
|
||||||
rm -rf /var/lib/apt/lists/*
|
# Doc conversion
|
||||||
|
libreoffice \
|
||||||
|
|
||||||
# OCR MY PDF (unpaper for descew and other advanced featues)
|
# OCR MY PDF (unpaper for descew and other advanced featues)
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2 && \
|
|
||||||
add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
ghostscript \
|
|
||||||
python3-pip \
|
|
||||||
ocrmypdf \
|
ocrmypdf \
|
||||||
unpaper && \
|
tesseract-ocr-data-eng \
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
# CV
|
||||||
mv /usr/share/tesseract-ocr /usr/share/tesseract-ocr-original && \
|
py3-opencv \
|
||||||
pip install --no-cache-dir --upgrade pip && \
|
# python3/pip
|
||||||
pip install --no-cache-dir --upgrade ocrmypdf && \
|
python3 && \
|
||||||
pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1
|
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
||||||
|
# uno unoconv and HTML
|
||||||
|
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \
|
||||||
#CV and HTML
|
mv /usr/share/tessdata /usr/share/tessdata-original
|
||||||
RUN pip install --no-cache-dir opencv-python-headless WeasyPrint
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ Depending on your requirements, you can choose the appropriate language pack for
|
||||||
### Installing Language Packs
|
### Installing Language Packs
|
||||||
|
|
||||||
1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
|
1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
|
||||||
2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/5/tessdata` (Debian) or `/usr/share/tesseract/tessdata` (Fedora)
|
2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tessdata`
|
||||||
|
|
||||||
# DO NOT REMOVE EXISTING ENG.TRAINEDDATA, IT'S REQUIRED.
|
# DO NOT REMOVE EXISTING ENG.TRAINEDDATA, IT'S REQUIRED.
|
||||||
|
|
||||||
|
@ -37,14 +37,14 @@ services:
|
||||||
your_service_name:
|
your_service_name:
|
||||||
image: your_docker_image_name
|
image: your_docker_image_name
|
||||||
volumes:
|
volumes:
|
||||||
- /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata
|
- /location/of/trainingData:/usr/share/tessdata
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
#### Docker run
|
#### Docker run
|
||||||
Add the following to your existing docker run command
|
Add the following to your existing docker run command
|
||||||
```bash
|
```bash
|
||||||
-v /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata
|
-v /location/of/trainingData:/usr/share/tessdata
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Non-Docker
|
#### Non-Docker
|
||||||
|
|
|
@ -139,7 +139,7 @@ Easiest is to use the langpacks provided by your repositories. Skip the other st
|
||||||
Manual:
|
Manual:
|
||||||
|
|
||||||
1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
|
1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
|
||||||
2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/5/tessdata`
|
2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tessdata`
|
||||||
3.
|
3.
|
||||||
Please view [OCRmyPDF install guide](https://ocrmypdf.readthedocs.io/en/latest/installation.html) for more info.
|
Please view [OCRmyPDF install guide](https://ocrmypdf.readthedocs.io/en/latest/installation.html) for more info.
|
||||||
**IMPORTANT:** DO NOT REMOVE EXISTING `eng.traineddata`, IT'S REQUIRED.
|
**IMPORTANT:** DO NOT REMOVE EXISTING `eng.traineddata`, IT'S REQUIRED.
|
||||||
|
|
|
@ -110,7 +110,7 @@ Docker Run
|
||||||
```bash
|
```bash
|
||||||
docker run -d \
|
docker run -d \
|
||||||
-p 8080:8080 \
|
-p 8080:8080 \
|
||||||
-v /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata \
|
-v /location/of/trainingData:/usr/share/tessdata \
|
||||||
-v /location/of/extraConfigs:/configs \
|
-v /location/of/extraConfigs:/configs \
|
||||||
-v /location/of/logs:/logs \
|
-v /location/of/logs:/logs \
|
||||||
-e DOCKER_ENABLE_SECURITY=false \
|
-e DOCKER_ENABLE_SECURITY=false \
|
||||||
|
@ -131,7 +131,7 @@ services:
|
||||||
ports:
|
ports:
|
||||||
- '8080:8080'
|
- '8080:8080'
|
||||||
volumes:
|
volumes:
|
||||||
- /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata #Required for extra OCR languages
|
- /location/of/trainingData:/usr/share/tessdata #Required for extra OCR languages
|
||||||
- /location/of/extraConfigs:/configs
|
- /location/of/extraConfigs:/configs
|
||||||
# - /location/of/customFiles:/customFiles/
|
# - /location/of/customFiles:/customFiles/
|
||||||
# - /location/of/logs:/logs/
|
# - /location/of/logs:/logs/
|
||||||
|
|
|
@ -6,7 +6,11 @@ mkdir -p /usr/share/tesseract-ocr
|
||||||
cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr
|
cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr
|
||||||
|
|
||||||
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
|
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
|
||||||
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tesseract-ocr/5/tessdata/ || true;
|
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tessdata || true;
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
|
||||||
|
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if TESSERACT_LANGS environment variable is set and is not empty
|
# Check if TESSERACT_LANGS environment variable is set and is not empty
|
||||||
|
|
Loading…
Reference in a new issue