fixes
This commit is contained in:
parent
8a143d139c
commit
22343e507d
6 changed files with 86 additions and 84 deletions
33
Dockerfile
33
Dockerfile
|
@ -1,5 +1,32 @@
|
||||||
# Use the base image
|
# Main stage
|
||||||
FROM frooodle/stirling-pdf-base:version8
|
FROM alpine:3.19.0
|
||||||
|
|
||||||
|
# JDK for app
|
||||||
|
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
||||||
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
||||||
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
||||||
|
apk add --no-cache \
|
||||||
|
ca-certificates \
|
||||||
|
tzdata \
|
||||||
|
tini \
|
||||||
|
bash \
|
||||||
|
curl \
|
||||||
|
openjdk17-jre \
|
||||||
|
# Doc conversion
|
||||||
|
libreoffice@testing \
|
||||||
|
# OCR MY PDF (unpaper for descew and other advanced featues)
|
||||||
|
ocrmypdf \
|
||||||
|
tesseract-ocr-data-eng \
|
||||||
|
# CV
|
||||||
|
py3-opencv \
|
||||||
|
# python3/pip
|
||||||
|
python3 && \
|
||||||
|
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
||||||
|
# uno unoconv and HTML
|
||||||
|
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \
|
||||||
|
mv /usr/share/tessdata /usr/share/tessdata-original
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ARG VERSION_TAG
|
ARG VERSION_TAG
|
||||||
|
|
||||||
|
@ -24,7 +51,7 @@ COPY build/libs/*.jar app.jar
|
||||||
## useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
## useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
||||||
## mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME && \
|
## mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME && \
|
||||||
# Set up necessary directories and permissions
|
# Set up necessary directories and permissions
|
||||||
RUN mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
RUN mkdir -p /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||||
##&& \
|
##&& \
|
||||||
## chown -R stirlingpdfuser:stirlingpdfgroup /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /customFiles && \
|
## chown -R stirlingpdfuser:stirlingpdfgroup /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /customFiles && \
|
||||||
## chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/tesseract-ocr-original && \
|
## chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/tesseract-ocr-original && \
|
||||||
|
|
|
@ -20,17 +20,19 @@ COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto
|
||||||
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto
|
COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto
|
||||||
COPY build/libs/*.jar app.jar
|
COPY build/libs/*.jar app.jar
|
||||||
|
|
||||||
RUN apk add --no-cache \
|
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
||||||
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
||||||
|
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
||||||
|
apk add --no-cache \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
tzdata \
|
tzdata \
|
||||||
tini \
|
tini \
|
||||||
bash \
|
bash \
|
||||||
curl \
|
curl \
|
||||||
openjdk17-jre && \
|
openjdk17-jre \
|
||||||
# Doc conversion
|
# Doc conversion
|
||||||
apk add --no-cache libreoffice --repository http://dl-cdn.alpinelinux.org/alpine/edge/community && \
|
libreoffice@testing \
|
||||||
# python and pip
|
# python and pip
|
||||||
apk add --no-cache \
|
|
||||||
python3 && \
|
python3 && \
|
||||||
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
||||||
# uno unoconv and HTML
|
# uno unoconv and HTML
|
||||||
|
@ -40,14 +42,11 @@ RUN apk add --no-cache \
|
||||||
# useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
# useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \
|
||||||
# mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
|
# mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME
|
||||||
# Set up necessary directories and permissions
|
# Set up necessary directories and permissions
|
||||||
mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
mkdir -p /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \
|
||||||
# chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/fonts/opentype/noto /configs /customFiles
|
# chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/fonts/opentype/noto /configs /customFiles
|
||||||
# Set font cache and permissions
|
# Set font cache and permissions
|
||||||
fc-cache -f -v && \
|
fc-cache -f -v && \
|
||||||
chmod +x /scripts/*.sh && \
|
chmod +x /scripts/*.sh
|
||||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
|
||||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
|
||||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories
|
|
||||||
# chown stirlingpdfuser:stirlingpdfgroup /app.jar
|
# chown stirlingpdfuser:stirlingpdfgroup /app.jar
|
||||||
|
|
||||||
# Set environment variables
|
# Set environment variables
|
||||||
|
|
|
@ -1,28 +0,0 @@
|
||||||
# Main stage
|
|
||||||
FROM alpine:3.19.0
|
|
||||||
|
|
||||||
# JDK for app
|
|
||||||
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
|
||||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
|
||||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
|
||||||
apk add --no-cache \
|
|
||||||
ca-certificates \
|
|
||||||
tzdata \
|
|
||||||
tini \
|
|
||||||
bash \
|
|
||||||
curl \
|
|
||||||
openjdk17-jre \
|
|
||||||
# Doc conversion
|
|
||||||
libreoffice@testing \
|
|
||||||
# OCR MY PDF (unpaper for descew and other advanced featues)
|
|
||||||
ocrmypdf \
|
|
||||||
tesseract-ocr-data-eng \
|
|
||||||
# CV
|
|
||||||
py3-opencv \
|
|
||||||
# python3/pip
|
|
||||||
python3 && \
|
|
||||||
wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \
|
|
||||||
# uno unoconv and HTML
|
|
||||||
pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \
|
|
||||||
mv /usr/share/tessdata /usr/share/tessdata-original
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.pdfbox.Loader;
|
import org.apache.pdfbox.Loader;
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.apache.pdfbox.text.PDFTextStripper;
|
||||||
import org.springframework.http.MediaType;
|
import org.springframework.http.MediaType;
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||||
|
@ -11,7 +12,6 @@ import org.springframework.web.bind.annotation.PostMapping;
|
||||||
import org.springframework.web.bind.annotation.RequestMapping;
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
import org.springframework.web.bind.annotation.RestController;
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
import org.apache.pdfbox.text.PDFTextStripper;
|
|
||||||
|
|
||||||
import io.github.pixee.security.Filenames;
|
import io.github.pixee.security.Filenames;
|
||||||
import io.swagger.v3.oas.annotations.Operation;
|
import io.swagger.v3.oas.annotations.Operation;
|
||||||
|
@ -65,16 +65,20 @@ public class ConvertPDFToOffice {
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
MultipartFile inputFile = request.getFileInput();
|
MultipartFile inputFile = request.getFileInput();
|
||||||
String outputFormat = request.getOutputFormat();
|
String outputFormat = request.getOutputFormat();
|
||||||
if ("txt".equals(request.getOutputFormat())) {
|
if ("txt".equals(request.getOutputFormat())) {
|
||||||
try (PDDocument document = Loader.loadPDF(inputFile.getBytes())) {
|
try (PDDocument document = Loader.loadPDF(inputFile.getBytes())) {
|
||||||
PDFTextStripper stripper = new PDFTextStripper();
|
PDFTextStripper stripper = new PDFTextStripper();
|
||||||
String text = stripper.getText(document);
|
String text = stripper.getText(document);
|
||||||
return WebResponseUtils.bytesToWebResponse(text.getBytes(), Filenames.toSimpleFileName(inputFile.getOriginalFilename()).replaceFirst("[.][^.]+$", "")
|
return WebResponseUtils.bytesToWebResponse(
|
||||||
+ ".txt" , MediaType.TEXT_PLAIN);
|
text.getBytes(),
|
||||||
|
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||||
|
.replaceFirst("[.][^.]+$", "")
|
||||||
|
+ ".txt",
|
||||||
|
MediaType.TEXT_PLAIN);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
PDFToFile pdfToFile = new PDFToFile();
|
PDFToFile pdfToFile = new PDFToFile();
|
||||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,9 @@ public class ShowJavascript {
|
||||||
|
|
||||||
if (script.isEmpty()) {
|
if (script.isEmpty()) {
|
||||||
script =
|
script =
|
||||||
"PDF '" + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + "' does not contain Javascript";
|
"PDF '"
|
||||||
|
+ Filenames.toSimpleFileName(inputFile.getOriginalFilename())
|
||||||
|
+ "' does not contain Javascript";
|
||||||
}
|
}
|
||||||
|
|
||||||
return WebResponseUtils.bytesToWebResponse(
|
return WebResponseUtils.bytesToWebResponse(
|
||||||
|
|
|
@ -97,46 +97,44 @@ public class StampController {
|
||||||
PDRectangle pageSize = page.getMediaBox();
|
PDRectangle pageSize = page.getMediaBox();
|
||||||
float margin = marginFactor * (pageSize.getWidth() + pageSize.getHeight()) / 2;
|
float margin = marginFactor * (pageSize.getWidth() + pageSize.getHeight()) / 2;
|
||||||
|
|
||||||
|
|
||||||
PDPageContentStream contentStream =
|
PDPageContentStream contentStream =
|
||||||
new PDPageContentStream(
|
new PDPageContentStream(
|
||||||
document, page, PDPageContentStream.AppendMode.APPEND, true, true);
|
document, page, PDPageContentStream.AppendMode.APPEND, true, true);
|
||||||
|
|
||||||
|
PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState();
|
||||||
|
graphicsState.setNonStrokingAlphaConstant(opacity);
|
||||||
|
contentStream.setGraphicsStateParameters(graphicsState);
|
||||||
|
|
||||||
PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState();
|
if ("text".equalsIgnoreCase(watermarkType)) {
|
||||||
graphicsState.setNonStrokingAlphaConstant(opacity);
|
addTextStamp(
|
||||||
contentStream.setGraphicsStateParameters(graphicsState);
|
contentStream,
|
||||||
|
watermarkText,
|
||||||
if ("text".equalsIgnoreCase(watermarkType)) {
|
document,
|
||||||
addTextStamp(
|
page,
|
||||||
contentStream,
|
rotation,
|
||||||
watermarkText,
|
position,
|
||||||
document,
|
fontSize,
|
||||||
page,
|
alphabet,
|
||||||
rotation,
|
overrideX,
|
||||||
position,
|
overrideY,
|
||||||
fontSize,
|
margin,
|
||||||
alphabet,
|
customColor);
|
||||||
overrideX,
|
} else if ("image".equalsIgnoreCase(watermarkType)) {
|
||||||
overrideY,
|
addImageStamp(
|
||||||
margin,
|
contentStream,
|
||||||
customColor);
|
watermarkImage,
|
||||||
} else if ("image".equalsIgnoreCase(watermarkType)) {
|
document,
|
||||||
addImageStamp(
|
page,
|
||||||
contentStream,
|
rotation,
|
||||||
watermarkImage,
|
position,
|
||||||
document,
|
fontSize,
|
||||||
page,
|
overrideX,
|
||||||
rotation,
|
overrideY,
|
||||||
position,
|
margin);
|
||||||
fontSize,
|
}
|
||||||
overrideX,
|
|
||||||
overrideY,
|
contentStream.close();
|
||||||
margin);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
contentStream.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return WebResponseUtils.pdfDocToWebResponse(
|
return WebResponseUtils.pdfDocToWebResponse(
|
||||||
document,
|
document,
|
||||||
|
|
Loading…
Reference in a new issue