docker changes

This commit is contained in:
Anthony Stirling 2023-12-10 23:06:35 +00:00
parent 59c7978330
commit 1f29033f17
21 changed files with 32 additions and 80 deletions

View file

@ -1,5 +1,5 @@
# Use the base image # Use the base image
FROM frooodle/stirling-pdf-base:testDontUseMe FROM frooodle/stirling-pdf-base:version6
ARG VERSION_TAG ARG VERSION_TAG

View file

@ -1,48 +1,50 @@
# Main stage # Main stage
FROM ubuntu:latest AS base FROM ubuntu:latest AS base
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2
RUN add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr
# JDK for app
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
openjdk-17-jre \ openjdk-17-jre
libreoffice-core-nogui \
# Doc conversion
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libreoffice-core-nogui \
libreoffice-common \ libreoffice-common \
libreoffice-writer-nogui \ libreoffice-writer-nogui \
libreoffice-calc-nogui \ libreoffice-calc-nogui \
libreoffice-impress-nogui \ libreoffice-impress-nogui \
python3-uno \ python3-uno \
unoconv
# OCR MY PDF (unpaper for descew and other advanced featues)
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2 && \
add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr && \
apt-get update && \
apt-get install -y --no-install-recommends \
ghostscript \ ghostscript \
python3-pip \ python3-pip \
ocrmypdf \ ocrmypdf \
unoconv && \ unpaper && \
pip install --upgrade pip && \ pip install --upgrade pip && \
pip install --no-cache-dir --user --upgrade ocrmypdf && \
pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1 \
pip install --no-cache-dir --upgrade ocrmypdf && \ pip install --no-cache-dir --upgrade ocrmypdf && \
pip install --no-cache-dir \ pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1
opencv-python-headless && \
rm -rf /var/lib/apt/lists/* && \
#CV
RUN pip install --no-cache-dir opencv-python-headless
# cleanup and etc
RUN rm -rf /var/lib/apt/lists/* && \
mkdir /usr/share/tesseract-ocr-original && \ mkdir /usr/share/tesseract-ocr-original && \
cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \ cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \
rm -rf /usr/share/tesseract-ocr rm -rf /usr/share/tesseract-ocr
# Python packages stage
FROM base AS python-packages
# Install build tools and Python libraries
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
libffi-dev \
libssl-dev \
zlib1g-dev \
libjpeg-dev
# Final stage: Copy necessary files from the previous stage
FROM base
COPY --from=python-packages /usr/local /usr/local

View file

@ -5,6 +5,10 @@ echo "Copying original files without overwriting existing files"
mkdir -p /usr/share/tesseract-ocr mkdir -p /usr/share/tesseract-ocr
cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tesseract-ocr/5/tessdata/ || true;
fi
# Check if TESSERACT_LANGS environment variable is set and is not empty # Check if TESSERACT_LANGS environment variable is set and is not empty
if [[ -n "$TESSERACT_LANGS" ]]; then if [[ -n "$TESSERACT_LANGS" ]]; then
# Convert comma-separated values to a space-separated list # Convert comma-separated values to a space-separated list

View file

@ -831,8 +831,5 @@ PDFToXML.submit=تحويل
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF ??? CSV PDFToCSV.title=PDF ??? CSV
PDFToCSV.header=PDF ??? CSV PDFToCSV.header=PDF ??? CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=?????? PDFToCSV.submit=??????

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Преобразуване
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF ??? CSV PDFToCSV.title=PDF ??? CSV
PDFToCSV.header=PDF ??? CSV PDFToCSV.header=PDF ??? CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=???????? PDFToCSV.submit=????????

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Converteix
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF a CSV PDFToCSV.title=PDF a CSV
PDFToCSV.header=PDF a CSV PDFToCSV.header=PDF a CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Extracte PDFToCSV.submit=Extracte

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Konvertieren
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF zu CSV PDFToCSV.title=PDF zu CSV
PDFToCSV.header=PDF zu CSV PDFToCSV.header=PDF zu CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Extrakt PDFToCSV.submit=Extrakt

View file

@ -831,8 +831,5 @@ PDFToXML.submit=\u039C\u03B5\u03C4\u03B1\u03C4\u03C1\u03BF\u03C0\u03AE
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF ?? CSV PDFToCSV.title=PDF ?? CSV
PDFToCSV.header=PDF ?? CSV PDFToCSV.header=PDF ?? CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=????????? PDFToCSV.submit=?????????

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Convert
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF to CSV PDFToCSV.title=PDF to CSV
PDFToCSV.header=PDF to CSV PDFToCSV.header=PDF to CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Extract PDFToCSV.submit=Extract

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Convertir
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF a CSV PDFToCSV.title=PDF a CSV
PDFToCSV.header=PDF a CSV PDFToCSV.header=PDF a CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Extracto PDFToCSV.submit=Extracto

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Bihurtu
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF a CSV PDFToCSV.title=PDF a CSV
PDFToCSV.header=PDF a CSV PDFToCSV.header=PDF a CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Extracto PDFToCSV.submit=Extracto

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Convertir
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF en CSV PDFToCSV.title=PDF en CSV
PDFToCSV.header=PDF en CSV PDFToCSV.header=PDF en CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Extrait PDFToCSV.submit=Extrait

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Converti
#PDFToCSV #PDFToCSV
PDFToCSV.title=Da PDF a CSV PDFToCSV.title=Da PDF a CSV
PDFToCSV.header=Da PDF a CSV PDFToCSV.header=Da PDF a CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Estratto PDFToCSV.submit=Estratto

View file

@ -831,8 +831,5 @@ PDFToXML.submit=変換
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF??CSV? PDFToCSV.title=PDF??CSV?
PDFToCSV.header=PDF??CSV? PDFToCSV.header=PDF??CSV?
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=???? PDFToCSV.submit=????

View file

@ -831,8 +831,5 @@ PDFToXML.submit=변환
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF? CSV? PDFToCSV.title=PDF? CSV?
PDFToCSV.header=PDF? CSV? PDFToCSV.header=PDF? CSV?
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=?? PDFToCSV.submit=??

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Converteren
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF naar CSV PDFToCSV.title=PDF naar CSV
PDFToCSV.header=PDF naar CSV PDFToCSV.header=PDF naar CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Extract PDFToCSV.submit=Extract

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Konwertuj
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF na CSV PDFToCSV.title=PDF na CSV
PDFToCSV.header=PDF na CSV PDFToCSV.header=PDF na CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Wyci?g PDFToCSV.submit=Wyci?g

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Конвертировать
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF ? CSV PDFToCSV.title=PDF ? CSV
PDFToCSV.header=PDF ? CSV PDFToCSV.header=PDF ? CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=??????? PDFToCSV.submit=???????

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Konvertera
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF till CSV PDFToCSV.title=PDF till CSV
PDFToCSV.header=PDF till CSV PDFToCSV.header=PDF till CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Navvit PDFToCSV.submit=Navvit

View file

@ -831,8 +831,5 @@ PDFToXML.submit=Dönüştür
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF to CSV PDFToCSV.title=PDF to CSV
PDFToCSV.header=PDF to CSV PDFToCSV.header=PDF to CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=Extract PDFToCSV.submit=Extract

View file

@ -831,8 +831,5 @@ PDFToXML.submit=转换
#PDFToCSV #PDFToCSV
PDFToCSV.title=PDF ? CSV PDFToCSV.title=PDF ? CSV
PDFToCSV.header=PDF ? CSV PDFToCSV.header=PDF ? CSV
##########################
### TODO: Translate ###
##########################
PDFToCSV.prompt=Choose page to extract table PDFToCSV.prompt=Choose page to extract table
PDFToCSV.submit=?? PDFToCSV.submit=??