docker changes
This commit is contained in:
parent
59c7978330
commit
1f29033f17
21 changed files with 32 additions and 80 deletions
|
@ -1,5 +1,5 @@
|
||||||
# Use the base image
|
# Use the base image
|
||||||
FROM frooodle/stirling-pdf-base:testDontUseMe
|
FROM frooodle/stirling-pdf-base:version6
|
||||||
|
|
||||||
ARG VERSION_TAG
|
ARG VERSION_TAG
|
||||||
|
|
||||||
|
|
|
@ -1,48 +1,50 @@
|
||||||
# Main stage
|
# Main stage
|
||||||
FROM ubuntu:latest AS base
|
FROM ubuntu:latest AS base
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2
|
|
||||||
|
|
||||||
RUN add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr
|
|
||||||
|
|
||||||
|
# JDK for app
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
openjdk-17-jre \
|
openjdk-17-jre
|
||||||
libreoffice-core-nogui \
|
|
||||||
|
|
||||||
|
# Doc conversion
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
libreoffice-core-nogui \
|
||||||
libreoffice-common \
|
libreoffice-common \
|
||||||
libreoffice-writer-nogui \
|
libreoffice-writer-nogui \
|
||||||
libreoffice-calc-nogui \
|
libreoffice-calc-nogui \
|
||||||
libreoffice-impress-nogui \
|
libreoffice-impress-nogui \
|
||||||
python3-uno \
|
python3-uno \
|
||||||
|
unoconv
|
||||||
|
|
||||||
|
|
||||||
|
# OCR MY PDF (unpaper for descew and other advanced featues)
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2 && \
|
||||||
|
add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
ghostscript \
|
ghostscript \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
ocrmypdf \
|
ocrmypdf \
|
||||||
unoconv && \
|
unpaper && \
|
||||||
pip install --upgrade pip && \
|
pip install --upgrade pip && \
|
||||||
pip install --no-cache-dir --user --upgrade ocrmypdf && \
|
|
||||||
pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1 \
|
|
||||||
pip install --no-cache-dir --upgrade ocrmypdf && \
|
pip install --no-cache-dir --upgrade ocrmypdf && \
|
||||||
pip install --no-cache-dir \
|
pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1
|
||||||
opencv-python-headless && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
|
#CV
|
||||||
|
RUN pip install --no-cache-dir opencv-python-headless
|
||||||
|
|
||||||
|
|
||||||
|
# cleanup and etc
|
||||||
|
RUN rm -rf /var/lib/apt/lists/* && \
|
||||||
mkdir /usr/share/tesseract-ocr-original && \
|
mkdir /usr/share/tesseract-ocr-original && \
|
||||||
cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \
|
cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \
|
||||||
rm -rf /usr/share/tesseract-ocr
|
rm -rf /usr/share/tesseract-ocr
|
||||||
|
|
||||||
|
|
||||||
# Python packages stage
|
|
||||||
FROM base AS python-packages
|
|
||||||
# Install build tools and Python libraries
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
build-essential \
|
|
||||||
libffi-dev \
|
|
||||||
libssl-dev \
|
|
||||||
zlib1g-dev \
|
|
||||||
libjpeg-dev
|
|
||||||
|
|
||||||
# Final stage: Copy necessary files from the previous stage
|
|
||||||
FROM base
|
|
||||||
COPY --from=python-packages /usr/local /usr/local
|
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,10 @@ echo "Copying original files without overwriting existing files"
|
||||||
mkdir -p /usr/share/tesseract-ocr
|
mkdir -p /usr/share/tesseract-ocr
|
||||||
cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr
|
cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr
|
||||||
|
|
||||||
|
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
|
||||||
|
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tesseract-ocr/5/tessdata/ || true;
|
||||||
|
fi
|
||||||
|
|
||||||
# Check if TESSERACT_LANGS environment variable is set and is not empty
|
# Check if TESSERACT_LANGS environment variable is set and is not empty
|
||||||
if [[ -n "$TESSERACT_LANGS" ]]; then
|
if [[ -n "$TESSERACT_LANGS" ]]; then
|
||||||
# Convert comma-separated values to a space-separated list
|
# Convert comma-separated values to a space-separated list
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=تحويل
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF ??? CSV
|
PDFToCSV.title=PDF ??? CSV
|
||||||
PDFToCSV.header=PDF ??? CSV
|
PDFToCSV.header=PDF ??? CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=??????
|
PDFToCSV.submit=??????
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Преобразуване
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF ??? CSV
|
PDFToCSV.title=PDF ??? CSV
|
||||||
PDFToCSV.header=PDF ??? CSV
|
PDFToCSV.header=PDF ??? CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=????????
|
PDFToCSV.submit=????????
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Converteix
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF a CSV
|
PDFToCSV.title=PDF a CSV
|
||||||
PDFToCSV.header=PDF a CSV
|
PDFToCSV.header=PDF a CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Extracte
|
PDFToCSV.submit=Extracte
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Konvertieren
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF zu CSV
|
PDFToCSV.title=PDF zu CSV
|
||||||
PDFToCSV.header=PDF zu CSV
|
PDFToCSV.header=PDF zu CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Extrakt
|
PDFToCSV.submit=Extrakt
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=\u039C\u03B5\u03C4\u03B1\u03C4\u03C1\u03BF\u03C0\u03AE
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF ?? CSV
|
PDFToCSV.title=PDF ?? CSV
|
||||||
PDFToCSV.header=PDF ?? CSV
|
PDFToCSV.header=PDF ?? CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=?????????
|
PDFToCSV.submit=?????????
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Convert
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF to CSV
|
PDFToCSV.title=PDF to CSV
|
||||||
PDFToCSV.header=PDF to CSV
|
PDFToCSV.header=PDF to CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Extract
|
PDFToCSV.submit=Extract
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Convertir
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF a CSV
|
PDFToCSV.title=PDF a CSV
|
||||||
PDFToCSV.header=PDF a CSV
|
PDFToCSV.header=PDF a CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Extracto
|
PDFToCSV.submit=Extracto
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Bihurtu
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF a CSV
|
PDFToCSV.title=PDF a CSV
|
||||||
PDFToCSV.header=PDF a CSV
|
PDFToCSV.header=PDF a CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Extracto
|
PDFToCSV.submit=Extracto
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Convertir
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF en CSV
|
PDFToCSV.title=PDF en CSV
|
||||||
PDFToCSV.header=PDF en CSV
|
PDFToCSV.header=PDF en CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Extrait
|
PDFToCSV.submit=Extrait
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Converti
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=Da PDF a CSV
|
PDFToCSV.title=Da PDF a CSV
|
||||||
PDFToCSV.header=Da PDF a CSV
|
PDFToCSV.header=Da PDF a CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Estratto
|
PDFToCSV.submit=Estratto
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=変換
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF??CSV?
|
PDFToCSV.title=PDF??CSV?
|
||||||
PDFToCSV.header=PDF??CSV?
|
PDFToCSV.header=PDF??CSV?
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=????
|
PDFToCSV.submit=????
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=변환
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF? CSV?
|
PDFToCSV.title=PDF? CSV?
|
||||||
PDFToCSV.header=PDF? CSV?
|
PDFToCSV.header=PDF? CSV?
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=??
|
PDFToCSV.submit=??
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Converteren
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF naar CSV
|
PDFToCSV.title=PDF naar CSV
|
||||||
PDFToCSV.header=PDF naar CSV
|
PDFToCSV.header=PDF naar CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Extract
|
PDFToCSV.submit=Extract
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Konwertuj
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF na CSV
|
PDFToCSV.title=PDF na CSV
|
||||||
PDFToCSV.header=PDF na CSV
|
PDFToCSV.header=PDF na CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Wyci?g
|
PDFToCSV.submit=Wyci?g
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Конвертировать
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF ? CSV
|
PDFToCSV.title=PDF ? CSV
|
||||||
PDFToCSV.header=PDF ? CSV
|
PDFToCSV.header=PDF ? CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=???????
|
PDFToCSV.submit=???????
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Konvertera
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF till CSV
|
PDFToCSV.title=PDF till CSV
|
||||||
PDFToCSV.header=PDF till CSV
|
PDFToCSV.header=PDF till CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Navvit
|
PDFToCSV.submit=Navvit
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=Dönüştür
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF to CSV
|
PDFToCSV.title=PDF to CSV
|
||||||
PDFToCSV.header=PDF to CSV
|
PDFToCSV.header=PDF to CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=Extract
|
PDFToCSV.submit=Extract
|
||||||
|
|
|
@ -831,8 +831,5 @@ PDFToXML.submit=转换
|
||||||
#PDFToCSV
|
#PDFToCSV
|
||||||
PDFToCSV.title=PDF ? CSV
|
PDFToCSV.title=PDF ? CSV
|
||||||
PDFToCSV.header=PDF ? CSV
|
PDFToCSV.header=PDF ? CSV
|
||||||
##########################
|
|
||||||
### TODO: Translate ###
|
|
||||||
##########################
|
|
||||||
PDFToCSV.prompt=Choose page to extract table
|
PDFToCSV.prompt=Choose page to extract table
|
||||||
PDFToCSV.submit=??
|
PDFToCSV.submit=??
|
||||||
|
|
Loading…
Reference in a new issue