print (WIP), fake scan (WIP) and text conversion for ultra-lite (#1098)

* Changes!

* lang

* fake scan init, print init and pdf to text for exe

* Hardening suggestions for Stirling-PDF / changes (#1099)

* Switch order of literals to prevent NullPointerException

* Introduced protections against predictable RNG abuse

---------

Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>

* Update README.md

* install custom fonts

* Formats etc

* version bump

* disable WIP work

* remove chinese font

---------

Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>
Co-authored-by: systo <systo@host.docker.internal>
This commit is contained in:
Anthony Stirling 2024-04-21 23:06:44 +01:00 committed by GitHub
parent 6c052a7b25
commit 71e93e3cb5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 494 additions and 143 deletions

View file

@ -25,15 +25,17 @@ ENV DOCKER_ENABLE_SECURITY=false \
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
apk update && \
apk add --no-cache \ apk add --no-cache \
ca-certificates \ ca-certificates \
tzdata \ tzdata \
tini \ tini \
openssl \
openssl-dev \
bash \ bash \
curl \ curl \
openjdk17-jre \ openjdk17-jre \
su-exec \ su-exec \
font-noto-cjk \
shadow \ shadow \
# Doc conversion # Doc conversion
libreoffice@testing \ libreoffice@testing \
@ -58,7 +60,8 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \ addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline && \ chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline && \
chown stirlingpdfuser:stirlingpdfgroup /app.jar && \ chown stirlingpdfuser:stirlingpdfgroup /app.jar && \
tesseract --list-langs tesseract --list-langs && \
rm -rf /var/cache/apk/*
EXPOSE 8080 EXPOSE 8080

View file

@ -121,6 +121,7 @@ docker run -d \
-v /location/of/logs:/logs \ -v /location/of/logs:/logs \
-e DOCKER_ENABLE_SECURITY=false \ -e DOCKER_ENABLE_SECURITY=false \
-e INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \ -e INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \
-e LANGS=en_GB \
--name stirling-pdf \ --name stirling-pdf \
frooodle/s-pdf:latest frooodle/s-pdf:latest
@ -147,6 +148,7 @@ services:
environment: environment:
- DOCKER_ENABLE_SECURITY=false - DOCKER_ENABLE_SECURITY=false
- INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false - INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false
- LANGS=en_GB
``` ```
Note: Podman is CLI-compatible with Docker, so simply replace "docker" with "podman". Note: Podman is CLI-compatible with Docker, so simply replace "docker" with "podman".
@ -252,13 +254,13 @@ metrics:
- ``SYSTEM_CONNECTIONTIMEOUTMINUTES`` to set custom connection timeout values - ``SYSTEM_CONNECTIONTIMEOUTMINUTES`` to set custom connection timeout values
- ``DOCKER_ENABLE_SECURITY`` to tell docker to download security jar (required as true for auth login) - ``DOCKER_ENABLE_SECURITY`` to tell docker to download security jar (required as true for auth login)
- ``INSTALL_BOOK_AND_ADVANCED_HTML_OPS`` to download calibre onto stirling-pdf enabling pdf to/from book and advanced html conversion - ``INSTALL_BOOK_AND_ADVANCED_HTML_OPS`` to download calibre onto stirling-pdf enabling pdf to/from book and advanced html conversion
- ``LANGS`` to define custom font libraries to install for use for document conversions
## API ## API
For those wanting to use Stirling-PDFs backend API to link with their own custom scripting to edit PDFs you can view all existing API documentation For those wanting to use Stirling-PDFs backend API to link with their own custom scripting to edit PDFs you can view all existing API documentation
[here](https://app.swaggerhub.com/apis-docs/Stirling-Tools/Stirling-PDF/) or navigate to /swagger-ui/index.html of your stirling-pdf instance for your versions documentation (Or by following the API button in your settings of Stirling-PDF) [here](https://app.swaggerhub.com/apis-docs/Stirling-Tools/Stirling-PDF/) or navigate to /swagger-ui/index.html of your stirling-pdf instance for your versions documentation (Or by following the API button in your settings of Stirling-PDF)
## Login authentication ## Login authentication
![stirling-login](images/login-light.png) ![stirling-login](images/login-light.png)

View file

@ -12,7 +12,7 @@ plugins {
import com.github.jk1.license.render.* import com.github.jk1.license.render.*
group = 'stirling.software' group = 'stirling.software'
version = '0.22.8' version = '0.23.0'
sourceCompatibility = '17' sourceCompatibility = '17'
repositories { repositories {

View file

@ -21,6 +21,8 @@ services:
environment: environment:
DOCKER_ENABLE_SECURITY: "false" DOCKER_ENABLE_SECURITY: "false"
SECURITY_ENABLELOGIN: "false" SECURITY_ENABLELOGIN: "false"
LANGS: "en_GB,en_US,ar_AR,de_DE,fr_FR,es_ES,zh_CN,zh_TW,ca_CA,it_IT,sv_SE,pl_PL,ro_RO,ko_KR,pt_BR,ru_RU,el_GR,hi_IN,hu_HU,tr_TR,id_ID"
INSTALL_BOOK_AND_ADVANCED_HTML_OPS: "true"
SYSTEM_DEFAULTLOCALE: en-US SYSTEM_DEFAULTLOCALE: en-US
UI_APPNAME: Stirling-PDF UI_APPNAME: Stirling-PDF
UI_HOMEDESCRIPTION: Demo site for Stirling-PDF Latest UI_HOMEDESCRIPTION: Demo site for Stirling-PDF Latest

View file

@ -1,25 +1,30 @@
#!/bin/sh #!/bin/bash
# Update the user and group IDs as per environment variables # Update the user and group IDs as per environment variables
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
usermod -o -u "$PUID" stirlingpdfuser || true usermod -o -u "$PUID" stirlingpdfuser || true
fi fi
if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
groupmod -o -g "$PGID" stirlingpdfgroup || true groupmod -o -g "$PGID" stirlingpdfgroup || true
fi fi
umask "$UMASK" || true umask "$UMASK" || true
if [[ "$INSTALL_BOOK_AND_ADVANCED_HTML_OPS" == "true" ]]; then if [[ "$INSTALL_BOOK_AND_ADVANCED_HTML_OPS" == "true" ]]; then
apk add --no-cache calibre@testing apk add --no-cache calibre@testing
fi fi
/scripts/download-security-jar.sh /scripts/download-security-jar.sh
if [[ -n "$LANGS" ]]; then
/scripts/installFonts.sh $LANGS
fi
echo "Setting permissions and ownership for necessary directories..." echo "Setting permissions and ownership for necessary directories..."
if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar; then # Attempt to change ownership of directories and files
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar || true if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /app.jar; then
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /app.jar || true
# If chown succeeds, execute the command as stirlingpdfuser # If chown succeeds, execute the command as stirlingpdfuser
exec su-exec stirlingpdfuser "$@" exec su-exec stirlingpdfuser "$@"
else else

View file

@ -13,18 +13,6 @@ if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true; cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
fi fi
# Update the user and group IDs as per environment variables
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
usermod -o -u "$PUID" stirlingpdfuser || true
fi
if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
groupmod -o -g "$PGID" stirlingpdfgroup || true
fi
umask "$UMASK" || true
# Check if TESSERACT_LANGS environment variable is set and is not empty # Check if TESSERACT_LANGS environment variable is set and is not empty
if [[ -n "$TESSERACT_LANGS" ]]; then if [[ -n "$TESSERACT_LANGS" ]]; then
# Convert comma-separated values to a space-separated list # Convert comma-separated values to a space-separated list
@ -40,20 +28,4 @@ if [[ -n "$TESSERACT_LANGS" ]]; then
done done
fi fi
if [[ "$INSTALL_BOOK_AND_ADVANCED_HTML_OPS" == "true" ]]; then /scripts/init-without-ocr.sh "$@"
apk add --no-cache calibre@testing
fi
/scripts/download-security-jar.sh
echo "Setting permissions and ownership for necessary directories..."
# Attempt to change ownership of directories and files
if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar; then
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar || true
# If chown succeeds, execute the command as stirlingpdfuser
exec su-exec stirlingpdfuser "$@"
else
# If chown fails, execute the command without changing the user context
echo "[WARN] Chown failed, running as host user"
exec "$@"
fi

67
scripts/installFonts.sh Normal file
View file

@ -0,0 +1,67 @@
#!/bin/bash
LANGS=$1
# Function to install a font package
install_font() {
echo "Installing font package: $1"
if ! apk add "$1" --no-cache; then
echo "Failed to install $1"
fi
}
# Install common fonts used across many languages
#common_fonts=(
# font-terminus
# font-dejavu
# font-noto
# font-noto-cjk
# font-awesome
# font-noto-extra
#)
#
#for font in "${common_fonts[@]}"; do
# install_font $font
#done
# Map languages to specific font packages
declare -A language_fonts=(
["ar_AR"]="font-noto-arabic"
["zh_CN"]="font-isas-misc"
["zh_TW"]="font-isas-misc"
["ja_JP"]="font-noto font-noto-thai font-noto-tibetan font-ipa font-sony-misc font-jis-misc"
["ru_RU"]="font-vollkorn font-misc-cyrillic font-mutt-misc font-screen-cyrillic font-winitzki-cyrillic font-cronyx-cyrillic"
["sr_LATN_RS"]="font-vollkorn font-misc-cyrillic font-mutt-misc font-screen-cyrillic font-winitzki-cyrillic font-cronyx-cyrillic"
["uk_UA"]="font-vollkorn font-misc-cyrillic font-mutt-misc font-screen-cyrillic font-winitzki-cyrillic font-cronyx-cyrillic"
["ko_KR"]="font-noto font-noto-thai font-noto-tibetan"
["el_GR"]="font-noto"
["hi_IN"]="font-noto-devanagari"
["bg_BG"]="font-vollkorn font-misc-cyrillic"
["GENERAL"]="font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra"
)
# Install fonts for other languages which generally do not need special packages beyond 'font-noto'
other_langs=("en_GB" "en_US" "de_DE" "fr_FR" "es_ES" "ca_CA" "it_IT" "pt_BR" "nl_NL" "sv_SE" "pl_PL" "ro_RO" "hu_HU" "tr_TR" "id_ID" "eu_ES")
if [[ $LANGS == "ALL" ]]; then
# Install all fonts from the language_fonts map
for fonts in "${language_fonts[@]}"; do
for font in $fonts; do
install_font $font
done
done
else
# Split comma-separated languages and install necessary fonts
IFS=',' read -ra LANG_CODES <<< "$LANGS"
for code in "${LANG_CODES[@]}"; do
if [[ " ${other_langs[@]} " =~ " ${code} " ]]; then
install_font font-noto
else
fonts_to_install=${language_fonts[$code]}
if [ ! -z "$fonts_to_install" ]; then
for font in $fonts_to_install; do
install_font $font
done
fi
fi
done
fi

View file

@ -62,6 +62,7 @@ public class SPdfApplication {
} }
public static void main(String[] args) throws IOException, InterruptedException { public static void main(String[] args) throws IOException, InterruptedException {
SpringApplication app = new SpringApplication(SPdfApplication.class); SpringApplication app = new SpringApplication(SPdfApplication.class);
app.addInitializers(new ConfigInitializer()); app.addInitializers(new ConfigInitializer());
if (Files.exists(Paths.get("configs/settings.yml"))) { if (Files.exists(Paths.get("configs/settings.yml"))) {

View file

@ -146,7 +146,6 @@ public class EndpointConfiguration {
addEndpointToGroup("CLI", "xlsx-to-pdf"); addEndpointToGroup("CLI", "xlsx-to-pdf");
addEndpointToGroup("CLI", "pdf-to-word"); addEndpointToGroup("CLI", "pdf-to-word");
addEndpointToGroup("CLI", "pdf-to-presentation"); addEndpointToGroup("CLI", "pdf-to-presentation");
addEndpointToGroup("CLI", "pdf-to-text");
addEndpointToGroup("CLI", "pdf-to-html"); addEndpointToGroup("CLI", "pdf-to-html");
addEndpointToGroup("CLI", "pdf-to-xml"); addEndpointToGroup("CLI", "pdf-to-xml");
addEndpointToGroup("CLI", "ocr-pdf"); addEndpointToGroup("CLI", "ocr-pdf");
@ -154,6 +153,7 @@ public class EndpointConfiguration {
addEndpointToGroup("CLI", "url-to-pdf"); addEndpointToGroup("CLI", "url-to-pdf");
addEndpointToGroup("CLI", "book-to-pdf"); addEndpointToGroup("CLI", "book-to-pdf");
addEndpointToGroup("CLI", "pdf-to-book"); addEndpointToGroup("CLI", "pdf-to-book");
addEndpointToGroup("CLI", "pdf-to-rtf");
// Calibre // Calibre
addEndpointToGroup("Calibre", "book-to-pdf"); addEndpointToGroup("Calibre", "book-to-pdf");
@ -175,7 +175,7 @@ public class EndpointConfiguration {
addEndpointToGroup("LibreOffice", "xlsx-to-pdf"); addEndpointToGroup("LibreOffice", "xlsx-to-pdf");
addEndpointToGroup("LibreOffice", "pdf-to-word"); addEndpointToGroup("LibreOffice", "pdf-to-word");
addEndpointToGroup("LibreOffice", "pdf-to-presentation"); addEndpointToGroup("LibreOffice", "pdf-to-presentation");
addEndpointToGroup("LibreOffice", "pdf-to-text"); addEndpointToGroup("LibreOffice", "pdf-to-rtf");
addEndpointToGroup("LibreOffice", "pdf-to-html"); addEndpointToGroup("LibreOffice", "pdf-to-html");
addEndpointToGroup("LibreOffice", "pdf-to-xml"); addEndpointToGroup("LibreOffice", "pdf-to-xml");
@ -218,6 +218,7 @@ public class EndpointConfiguration {
addEndpointToGroup("Java", "overlay-pdf"); addEndpointToGroup("Java", "overlay-pdf");
addEndpointToGroup("Java", "split-pdf-by-sections"); addEndpointToGroup("Java", "split-pdf-by-sections");
addEndpointToGroup("Java", REMOVE_BLANKS); addEndpointToGroup("Java", REMOVE_BLANKS);
addEndpointToGroup("Java", "pdf-to-text");
// Javascript // Javascript
addEndpointToGroup("Javascript", "pdf-organizer"); addEndpointToGroup("Javascript", "pdf-organizer");

View file

@ -31,7 +31,8 @@ public class ConvertPDFToPDFA {
summary = "Convert a PDF to a PDF/A", summary = "Convert a PDF to a PDF/A",
description = description =
"This endpoint converts a PDF file to a PDF/A file. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO") "This endpoint converts a PDF file to a PDF/A file. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> pdfToPdfA(@ModelAttribute PdfToPdfARequest request) throws Exception { public ResponseEntity<byte[]> pdfToPdfA(@ModelAttribute PdfToPdfARequest request)
throws Exception {
MultipartFile inputFile = request.getFileInput(); MultipartFile inputFile = request.getFileInput();
String outputFormat = request.getOutputFormat(); String outputFormat = request.getOutputFormat();

View file

@ -1,27 +1,25 @@
package stirling.software.SPDF.controller.api.misc; package stirling.software.SPDF.controller.api.misc;
import java.awt.AlphaComposite;
import java.awt.Color; import java.awt.Color;
import java.awt.GradientPaint;
import java.awt.Graphics2D;
import java.awt.geom.AffineTransform; import java.awt.geom.AffineTransform;
import java.awt.image.AffineTransformOp; import java.awt.image.AffineTransformOp;
import java.awt.image.BufferedImage; import java.awt.image.BufferedImage;
import java.awt.image.BufferedImageOp; import java.awt.image.BufferedImageOp;
import java.awt.image.ConvolveOp; import java.awt.image.ConvolveOp;
import java.awt.image.Kernel; import java.awt.image.Kernel;
import java.awt.image.RescaleOp;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.security.SecureRandom; import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.List;
import java.util.Random; import java.util.Random;
import javax.imageio.ImageIO;
import org.apache.pdfbox.Loader; import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.rendering.PDFRenderer;
@ -29,16 +27,17 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute; import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController; import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import io.github.pixee.security.Filenames; import io.github.pixee.security.Filenames;
import io.swagger.v3.oas.annotations.Hidden;
import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag; import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.PDFFile; import stirling.software.SPDF.model.api.PDFFile;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WebResponseUtils; import stirling.software.SPDF.utils.WebResponseUtils;
@RestController @RestController
@ -48,98 +47,38 @@ public class FakeScanControllerWIP {
private static final Logger logger = LoggerFactory.getLogger(FakeScanControllerWIP.class); private static final Logger logger = LoggerFactory.getLogger(FakeScanControllerWIP.class);
// TODO //TODO
@Hidden //@PostMapping(consumes = "multipart/form-data", value = "/fake-scan")
// @PostMapping(consumes = "multipart/form-data", value = "/fakeScan") //@Operation(
@Operation( // summary = "Repair a PDF file",
summary = "Repair a PDF file", // description =
description = // "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response.")
"This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response.") public ResponseEntity<byte[]> fakeScan(@ModelAttribute PDFFile request) throws IOException {
public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile request) throws IOException {
MultipartFile inputFile = request.getFileInput(); MultipartFile inputFile = request.getFileInput();
// Load the PDF document
PDDocument document = Loader.loadPDF(inputFile.getBytes()); PDDocument document = Loader.loadPDF(inputFile.getBytes());
PDFRenderer pdfRenderer = new PDFRenderer(document); PDFRenderer renderer = new PDFRenderer(document);
pdfRenderer.setSubsamplingAllowed(true); List<BufferedImage> images = new ArrayList<>();
for (int page = 0; page < document.getNumberOfPages(); ++page) { // Convert each page to an image
BufferedImage image = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB); for (int i = 0; i < document.getNumberOfPages(); i++) {
ImageIO.write(image, "png", new File("scanned-" + (page + 1) + ".png")); BufferedImage image = renderer.renderImageWithDPI(i, 150, ImageType.GRAY);
images.add(processImage(image));
} }
document.close(); document.close();
// Constants // Create a new PDF document with the processed images
int scannedness = 90; // Value between 0 and 100
int dirtiness = 0; // Value between 0 and 100
// Load the source image
BufferedImage sourceImage = ImageIO.read(new File("scanned-1.png"));
// Create the destination image
BufferedImage destinationImage =
new BufferedImage(
sourceImage.getWidth(), sourceImage.getHeight(), sourceImage.getType());
// Apply a brightness and contrast effect based on the "scanned-ness"
float scaleFactor = 1.0f + (scannedness / 100.0f) * 0.5f; // Between 1.0 and 1.5
float offset = scannedness * 1.5f; // Between 0 and 150
BufferedImageOp op = new RescaleOp(scaleFactor, offset, null);
op.filter(sourceImage, destinationImage);
// Apply a rotation effect
double rotationRequired =
Math.toRadians(
(new SecureRandom().nextInt(3 - 1)
+ 1)); // Random angle between 1 and 3 degrees
double locationX = destinationImage.getWidth() / 2;
double locationY = destinationImage.getHeight() / 2;
AffineTransform tx =
AffineTransform.getRotateInstance(rotationRequired, locationX, locationY);
AffineTransformOp rotateOp = new AffineTransformOp(tx, AffineTransformOp.TYPE_BILINEAR);
destinationImage = rotateOp.filter(destinationImage, null);
// Apply a blur effect based on the "scanned-ness"
float blurIntensity = scannedness / 100.0f * 0.2f; // Between 0.0 and 0.2
float[] matrix = {
blurIntensity, blurIntensity, blurIntensity,
blurIntensity, blurIntensity, blurIntensity,
blurIntensity, blurIntensity, blurIntensity
};
BufferedImageOp blurOp =
new ConvolveOp(new Kernel(3, 3, matrix), ConvolveOp.EDGE_NO_OP, null);
destinationImage = blurOp.filter(destinationImage, null);
// Add noise to the image based on the "dirtiness"
Random random = new SecureRandom();
for (int y = 0; y < destinationImage.getHeight(); y++) {
for (int x = 0; x < destinationImage.getWidth(); x++) {
if (random.nextInt(100) < dirtiness) {
// Change the pixel color to black randomly based on the "dirtiness"
destinationImage.setRGB(x, y, Color.BLACK.getRGB());
}
}
}
// Save the image
ImageIO.write(destinationImage, "PNG", new File("scanned-1.png"));
PDDocument documentOut = new PDDocument();
for (int page = 1; page <= document.getNumberOfPages(); ++page) {
BufferedImage bim = ImageIO.read(new File("scanned-" + page + ".png"));
// Adjust the dimensions of the page
PDPage pdPage = new PDPage(new PDRectangle(bim.getWidth() - 1, bim.getHeight() - 1));
documentOut.addPage(pdPage);
PDImageXObject pdImage = LosslessFactory.createFromImage(documentOut, bim);
PDPageContentStream contentStream = new PDPageContentStream(documentOut, pdPage);
// Draw the image with a slight offset and enlarged dimensions
contentStream.drawImage(pdImage, -1, -1, bim.getWidth() + 2, bim.getHeight() + 2);
contentStream.close();
}
ByteArrayOutputStream baos = new ByteArrayOutputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream();
documentOut.save(baos); PDDocument newDocument = new PDDocument();
documentOut.close(); for (BufferedImage img : images) {
// PDPageContentStream contentStream = new PDPageContentStream(newDocument, new
// PDPage());
PDImageXObject pdImage = JPEGFactory.createFromImage(newDocument, img);
PdfUtils.addImageToDocument(newDocument, pdImage, "maintainAspectRatio", false);
}
newDocument.save(baos);
newDocument.close();
// Return the optimized PDF as a response // Return the optimized PDF as a response
String outputFilename = String outputFilename =
@ -148,4 +87,164 @@ public class FakeScanControllerWIP {
+ "_scanned.pdf"; + "_scanned.pdf";
return WebResponseUtils.boasToWebResponse(baos, outputFilename); return WebResponseUtils.boasToWebResponse(baos, outputFilename);
} }
public BufferedImage processImage(BufferedImage image) {
// Rotation
image = rotate(image);
// image = softenEdges(image, 5);
image = applyGaussianBlur(image, 0.5);
addGaussianNoise(image, 0.25);
image = linearStretch(image);
return image;
}
private BufferedImage rotate(BufferedImage image) {
double rotationRequired = Math.toRadians(1.0);
double locationX = image.getWidth() / 2;
double locationY = image.getHeight() / 2;
AffineTransform tx =
AffineTransform.getRotateInstance(rotationRequired, locationX, locationY);
AffineTransformOp op = new AffineTransformOp(tx, AffineTransformOp.TYPE_BICUBIC);
return op.filter(image, null);
}
private BufferedImage applyGaussianBlur(BufferedImage image, double sigma) {
int radius = 3; // Fixed radius size for simplicity
int size = 2 * radius + 1;
float[] data = new float[size * size];
double sum = 0.0;
for (int i = -radius; i <= radius; i++) {
for (int j = -radius; j <= radius; j++) {
double xDistance = i * i;
double yDistance = j * j;
double g = Math.exp(-(xDistance + yDistance) / (2 * sigma * sigma));
data[(i + radius) * size + j + radius] = (float) g;
sum += g;
}
}
// Normalize the kernel
for (int i = 0; i < data.length; i++) {
data[i] /= sum;
}
Kernel kernel = new Kernel(size, size, data);
BufferedImageOp op = new ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null);
return op.filter(image, null);
}
public BufferedImage softenEdges(BufferedImage image, int featherRadius) {
int width = image.getWidth();
int height = image.getHeight();
BufferedImage output = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
Graphics2D g2 = output.createGraphics();
g2.drawImage(image, 0, 0, null);
g2.setComposite(AlphaComposite.DstIn);
g2.setPaint(
new GradientPaint(
0, 0, new Color(0, 0, 0, 1f), 0, featherRadius, new Color(0, 0, 0, 0f)));
g2.fillRect(0, 0, width, featherRadius); // top edge
g2.setPaint(
new GradientPaint(
0,
height - featherRadius,
new Color(0, 0, 0, 0f),
0,
height,
new Color(0, 0, 0, 1f)));
g2.fillRect(0, height - featherRadius, width, featherRadius); // bottom edge
g2.setPaint(
new GradientPaint(
0, 0, new Color(0, 0, 0, 1f), featherRadius, 0, new Color(0, 0, 0, 0f)));
g2.fillRect(0, 0, featherRadius, height); // left edge
g2.setPaint(
new GradientPaint(
width - featherRadius,
0,
new Color(0, 0, 0, 0f),
width,
0,
new Color(0, 0, 0, 1f)));
g2.fillRect(width - featherRadius, 0, featherRadius, height); // right edge
g2.dispose();
return output;
}
private void addGaussianNoise(BufferedImage image, double strength) {
Random rand = new SecureRandom();
int width = image.getWidth();
int height = image.getHeight();
for (int i = 0; i < width; i++) {
for (int j = 0; j < height; j++) {
int rgba = image.getRGB(i, j);
int alpha = (rgba >> 24) & 0xff;
int red = (rgba >> 16) & 0xff;
int green = (rgba >> 8) & 0xff;
int blue = rgba & 0xff;
// Apply Gaussian noise
red = (int) (red + rand.nextGaussian() * strength);
green = (int) (green + rand.nextGaussian() * strength);
blue = (int) (blue + rand.nextGaussian() * strength);
// Clamping values to the 0-255 range
red = Math.min(Math.max(0, red), 255);
green = Math.min(Math.max(0, green), 255);
blue = Math.min(Math.max(0, blue), 255);
image.setRGB(i, j, (alpha << 24) | (red << 16) | (green << 8) | blue);
}
}
}
public BufferedImage linearStretch(BufferedImage image) {
int width = image.getWidth();
int height = image.getHeight();
int min = 255;
int max = 0;
// First pass: find the min and max grayscale values
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int rgb = image.getRGB(x, y);
int gray =
(int)
(((rgb >> 16) & 0xff) * 0.299
+ ((rgb >> 8) & 0xff) * 0.587
+ (rgb & 0xff) * 0.114); // Convert to grayscale
if (gray < min) min = gray;
if (gray > max) max = gray;
}
}
// Second pass: stretch the histogram
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int rgb = image.getRGB(x, y);
int alpha = (rgb >> 24) & 0xff;
int red = (rgb >> 16) & 0xff;
int green = (rgb >> 8) & 0xff;
int blue = rgb & 0xff;
// Apply linear stretch to each channel
red = (int) (((red - min) / (float) (max - min)) * 255);
green = (int) (((green - min) / (float) (max - min)) * 255);
blue = (int) (((blue - min) / (float) (max - min)) * 255);
// Set new RGB value maintaining the alpha channel
rgb = (alpha << 24) | (red << 16) | (green << 8) | blue;
image.setRGB(x, y, rgb);
}
}
return image;
}
} }

View file

@ -0,0 +1,106 @@
package stirling.software.SPDF.controller.api.misc;
import java.awt.Graphics;
import java.awt.Graphics2D;
import java.awt.image.BufferedImage;
import java.awt.print.PageFormat;
import java.awt.print.Printable;
import java.awt.print.PrinterException;
import java.awt.print.PrinterJob;
import java.io.IOException;
import java.util.Arrays;
import javax.imageio.ImageIO;
import javax.print.PrintService;
import javax.print.PrintServiceLookup;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.printing.PDFPageable;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.misc.PrintFileRequest;
@RestController
@RequestMapping("/api/v1/misc")
@Tag(name = "Misc", description = "Miscellaneous APIs")
public class PrintFileController {
//TODO
//@PostMapping(value = "/print-file", consumes = "multipart/form-data")
//@Operation(
// summary = "Prints PDF/Image file to a set printer",
// description =
// "Input of PDF or Image along with a printer name/URL/IP to match against to send it to (Fire and forget) Input:Any Output:N/A Type:SISO")
public ResponseEntity<String> printFile(@ModelAttribute PrintFileRequest request)
throws IOException {
MultipartFile file = request.getFileInput();
String printerName = request.getPrinterName();
String contentType = file.getContentType();
try {
// Find matching printer
PrintService[] services = PrintServiceLookup.lookupPrintServices(null, null);
PrintService selectedService =
Arrays.stream(services)
.filter(
service ->
service.getName().toLowerCase().contains(printerName))
.findFirst()
.orElseThrow(
() ->
new IllegalArgumentException(
"No matching printer found"));
System.out.println("Selected Printer: " + selectedService.getName());
if ("application/pdf".equals(contentType)) {
PDDocument document = Loader.loadPDF(file.getBytes());
PrinterJob job = PrinterJob.getPrinterJob();
job.setPrintService(selectedService);
job.setPageable(new PDFPageable(document));
job.print();
document.close();
} else if (contentType.startsWith("image/")) {
BufferedImage image = ImageIO.read(file.getInputStream());
PrinterJob job = PrinterJob.getPrinterJob();
job.setPrintService(selectedService);
job.setPrintable(
new Printable() {
public int print(
Graphics graphics, PageFormat pageFormat, int pageIndex)
throws PrinterException {
if (pageIndex != 0) {
return NO_SUCH_PAGE;
}
Graphics2D g2d = (Graphics2D) graphics;
g2d.translate(
pageFormat.getImageableX(), pageFormat.getImageableY());
g2d.drawImage(
image,
0,
0,
(int) pageFormat.getImageableWidth(),
(int) pageFormat.getImageableHeight(),
null);
return PAGE_EXISTS;
}
});
job.print();
}
return new ResponseEntity<>(
"File printed successfully to " + selectedService.getName(), HttpStatus.OK);
} catch (Exception e) {
System.err.println("Failed to print: " + e.getMessage());
return new ResponseEntity<>(e.getMessage(), HttpStatus.BAD_REQUEST);
}
}
}

View file

@ -54,6 +54,13 @@ public class OtherWebController {
return "misc/add-page-numbers"; return "misc/add-page-numbers";
} }
@GetMapping("/fake-scan")
@Hidden
public String fakeScanForm(Model model) {
model.addAttribute("currentPage", "fake-scan");
return "misc/fake-scan";
}
@GetMapping("/extract-images") @GetMapping("/extract-images")
@Hidden @Hidden
public String extractImagesForm(Model model) { public String extractImagesForm(Model model) {
@ -82,6 +89,13 @@ public class OtherWebController {
return "misc/compare"; return "misc/compare";
} }
@GetMapping("/print-file")
@Hidden
public String printFileForm(Model model) {
model.addAttribute("currentPage", "print-file");
return "misc/print-file";
}
public List<String> getAvailableTesseractLanguages() { public List<String> getAvailableTesseractLanguages() {
String tessdataDir = "/usr/share/tessdata"; String tessdataDir = "/usr/share/tessdata";
File[] files = new File(tessdataDir).listFiles(); File[] files = new File(tessdataDir).listFiles();

View file

@ -0,0 +1,15 @@
package stirling.software.SPDF.model.api.misc;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
import stirling.software.SPDF.model.api.PDFFile;
@Data
@EqualsAndHashCode(callSuper = true)
public class PrintFileRequest extends PDFFile {
@Schema(description = "Name of printer to match against", required = true)
private String printerName;
}

View file

@ -336,7 +336,7 @@ public class PdfUtils {
} }
} }
private static void addImageToDocument( public static void addImageToDocument(
PDDocument doc, PDImageXObject image, String fitOption, boolean autoRotate) PDDocument doc, PDImageXObject image, String fitOption, boolean autoRotate)
throws IOException { throws IOException {
boolean imageIsLandscape = image.getWidth() > image.getHeight(); boolean imageIsLandscape = image.getWidth() > image.getHeight();

View file

@ -19,14 +19,13 @@
<div class="mb-3"> <div class="mb-3">
<label th:text="#{PDFToText.selectText.1}"></label> <label th:text="#{PDFToText.selectText.1}"></label>
<select class="form-control" name="outputFormat"> <select class="form-control" name="outputFormat">
<option value="rtf">RTF</option> <option th:if="${@endpointConfiguration.isEndpointEnabled('pdf-to-rtf')}" value="rtf">RTF</option>
<option value="txt">TXT</option> <option value="txt">TXT</option>
</select> </select>
</div> </div>
<br>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{PDFToText.submit}"></button> <button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{PDFToText.submit}"></button>
</form> </form>
<p class="mt-3" th:text="#{PDFToText.credit}"></p> <p th:if="${@endpointConfiguration.isEndpointEnabled('pdf-to-rtf')}" class="mt-3" th:text="#{PDFToText.credit}"></p>
</div> </div>
</div> </div>
</div> </div>

View file

@ -0,0 +1,29 @@
<!DOCTYPE html>
<html th:lang="${#locale.language}" th:dir="#{language.direction}" th:data-language="${#locale.toString()}" xmlns:th="http://www.thymeleaf.org">
<head>
<th:block th:insert="~{fragments/common :: head(title=#{fakeScan.title}, header=#{fakeScan.header})}"></th:block>
</head>
<body>
<th:block th:insert="~{fragments/common :: game}"></th:block>
<div id="page-container">
<div id="content-wrap">
<th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block>
<br><br>
<div class="container">
<div class="row justify-content-center">
<div class="col-md-6">
<h2 th:text="#{fakeScan.header}"></h2>
<form method="post" enctype="multipart/form-data" th:action="@{api/v1/misc/fake-scan}">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
<br>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{fakeScan.submit}"></button>
</form>
</div>
</div>
</div>
</div>
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
</div>
</body>
</html>

View file

@ -0,0 +1,35 @@
<!DOCTYPE html>
<html th:lang="${#locale.language}" th:dir="#{language.direction}" th:data-language="${#locale.toString()}" xmlns:th="http://www.thymeleaf.org">
<head>
<th:block th:insert="~{fragments/common :: head(title=#{printFile.title}, header=#{printFile.header})}"></th:block>
</head>
<body>
<th:block th:insert="~{fragments/common :: game}"></th:block>
<div id="page-container">
<div id="content-wrap">
<th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block>
<br><br>
<div class="container">
<div class="row justify-content-center">
<div class="col-md-6">
<h2 th:text="#{printFile.header}"></h2>
<form action="#" th:action="@{api/v1/misc/print-file}" method="post" enctype="multipart/form-data">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf,image/*')}"></div>
<div class="card mb-3">
<div class="card-body">
<h4 th:text="#{printFile.selectText.1}">Select Printer</h4> <!-- Assuming the message code printFile.selectText.3 corresponds to "Select Printer" -->
<label for="printerName" th:text="#{printFile.selectText.2}">Printer Name:</label> <!-- Assuming the message code printFile.selectText.4 corresponds to "Printer Name:" -->
<input type="text" name="printerName" id="printerName" class="form-control">
</div>
</div>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{printFile.submit}"></button>
</form>
</div>
</div>
</div>
</div>
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
</div>
</body>
</html>

View file

@ -78,8 +78,8 @@ main() {
# Building Docker images # Building Docker images
docker build --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest -f ./Dockerfile . docker build --no-cache --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest -f ./Dockerfile .
docker build --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest-ultra-lite -f ./Dockerfile-ultra-lite . docker build --no-cache --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest-ultra-lite -f ./Dockerfile-ultra-lite .
# Test each configuration # Test each configuration
run_tests "Stirling-PDF-Ultra-Lite" "./exampleYmlFiles/docker-compose-latest-ultra-lite.yml" run_tests "Stirling-PDF-Ultra-Lite" "./exampleYmlFiles/docker-compose-latest-ultra-lite.yml"
@ -94,8 +94,8 @@ main() {
# Building Docker images with security enabled # Building Docker images with security enabled
docker build --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest -f ./Dockerfile . docker build --no-cache --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest -f ./Dockerfile .
docker build --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest-ultra-lite -f ./Dockerfile-ultra-lite . docker build --no-cache --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest-ultra-lite -f ./Dockerfile-ultra-lite .
# Test each configuration with security # Test each configuration with security
run_tests "Stirling-PDF-Ultra-Lite-Security" "./exampleYmlFiles/docker-compose-latest-ultra-lite-security.yml" run_tests "Stirling-PDF-Ultra-Lite-Security" "./exampleYmlFiles/docker-compose-latest-ultra-lite-security.yml"