fixes
This commit is contained in:
parent
8a143d139c
commit
22343e507d
6 changed files with 86 additions and 84 deletions
33
Dockerfile
33
Dockerfile
|
@ -1,5 +1,32 @@
|
|||
# Use the base image
|
||||
FROM frooodle/stirling-pdf-base:version8
|
||||
# Main stage
|
||||
FROM alpine:3.19.0
|
||||
|
||||
# JDK for app
|
||||
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
||||
apk add --no-cache \
|
||||
ca-certificates \
|
||||
tzdata \
|
||||
tini \
|
||||
bash \
|
||||
curl \
|
||||
openjdk17-jre \
|
||||
# Doc conversion
|
||||
libreoffice@testing \
|
||||
# OCR MY PDF (unpaper for descew and other advanced featues)
|
||||
ocrmypdf \
|
||||
tesseract-ocr-data-eng \
|
||||
# CV
|
||||
py3-opencv \
|
||||
# python3/pip
|
||||
python3 && \
|
||||
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
||||
# uno unoconv and HTML
|
||||
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \
|
||||
mv /usr/share/tessdata /usr/share/tessdata-original
|
||||
|
||||
|
||||
|
||||
ARG VERSION_TAG
|
||||
|
||||
|
@ -24,7 +51,7 @@ COPY build/libs/*.jar app.jar
|
|||
## useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
||||
## mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME && \
|
||||
# Set up necessary directories and permissions
|
||||
RUN mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||
RUN mkdir -p /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||
##&& \
|
||||
## chown -R stirlingpdfuser:stirlingpdfgroup /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /customFiles && \
|
||||
## chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/tesseract-ocr-original && \
|
||||
|
|
|
@ -20,17 +20,19 @@ COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto
|
|||
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto
|
||||
COPY build/libs/*.jar app.jar
|
||||
|
||||
RUN apk add --no-cache \
|
||||
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
||||
apk add --no-cache \
|
||||
ca-certificates \
|
||||
tzdata \
|
||||
tini \
|
||||
bash \
|
||||
curl \
|
||||
openjdk17-jre && \
|
||||
openjdk17-jre \
|
||||
# Doc conversion
|
||||
apk add --no-cache libreoffice --repository http://dl-cdn.alpinelinux.org/alpine/edge/community && \
|
||||
libreoffice@testing \
|
||||
# python and pip
|
||||
apk add --no-cache \
|
||||
python3 && \
|
||||
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
||||
# uno unoconv and HTML
|
||||
|
@ -40,14 +42,11 @@ RUN apk add --no-cache \
|
|||
# useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
||||
# mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
|
||||
# Set up necessary directories and permissions
|
||||
mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||
mkdir -p /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||
# chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/fonts/opentype/noto /configs /customFiles
|
||||
# Set font cache and permissions
|
||||
fc-cache -f -v && \
|
||||
chmod +x /scripts/*.sh && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories
|
||||
chmod +x /scripts/*.sh
|
||||
# chown stirlingpdfuser:stirlingpdfgroup /app.jar
|
||||
|
||||
# Set environment variables
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
# Main stage
|
||||
FROM alpine:3.19.0
|
||||
|
||||
# JDK for app
|
||||
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
||||
apk add --no-cache \
|
||||
ca-certificates \
|
||||
tzdata \
|
||||
tini \
|
||||
bash \
|
||||
curl \
|
||||
openjdk17-jre \
|
||||
# Doc conversion
|
||||
libreoffice@testing \
|
||||
# OCR MY PDF (unpaper for descew and other advanced featues)
|
||||
ocrmypdf \
|
||||
tesseract-ocr-data-eng \
|
||||
# CV
|
||||
py3-opencv \
|
||||
# python3/pip
|
||||
python3 && \
|
||||
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
||||
# uno unoconv and HTML
|
||||
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \
|
||||
mv /usr/share/tessdata /usr/share/tessdata-original
|
||||
|
|
@ -4,6 +4,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
|
@ -11,7 +12,6 @@ import org.springframework.web.bind.annotation.PostMapping;
|
|||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
|
||||
import io.github.pixee.security.Filenames;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
|
@ -69,8 +69,12 @@ public class ConvertPDFToOffice {
|
|||
try (PDDocument document = Loader.loadPDF(inputFile.getBytes())) {
|
||||
PDFTextStripper stripper = new PDFTextStripper();
|
||||
String text = stripper.getText(document);
|
||||
return WebResponseUtils.bytesToWebResponse(text.getBytes(), Filenames.toSimpleFileName(inputFile.getOriginalFilename()).replaceFirst("[.][^.]+$", "")
|
||||
+ ".txt" , MediaType.TEXT_PLAIN);
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
text.getBytes(),
|
||||
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||
.replaceFirst("[.][^.]+$", "")
|
||||
+ ".txt",
|
||||
MediaType.TEXT_PLAIN);
|
||||
}
|
||||
} else {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
|
|
|
@ -68,7 +68,9 @@ public class ShowJavascript {
|
|||
|
||||
if (script.isEmpty()) {
|
||||
script =
|
||||
"PDF '" + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + "' does not contain Javascript";
|
||||
"PDF '"
|
||||
+ Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||
+ "' does not contain Javascript";
|
||||
}
|
||||
|
||||
return WebResponseUtils.bytesToWebResponse(
|
||||
|
|
|
@ -97,12 +97,10 @@ public class StampController {
|
|||
PDRectangle pageSize = page.getMediaBox();
|
||||
float margin = marginFactor * (pageSize.getWidth() + pageSize.getHeight()) / 2;
|
||||
|
||||
|
||||
PDPageContentStream contentStream =
|
||||
new PDPageContentStream(
|
||||
document, page, PDPageContentStream.AppendMode.APPEND, true, true);
|
||||
|
||||
|
||||
PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState();
|
||||
graphicsState.setNonStrokingAlphaConstant(opacity);
|
||||
contentStream.setGraphicsStateParameters(graphicsState);
|
||||
|
|
Loading…
Reference in a new issue