This commit is contained in:
Anthony Stirling 2023-05-19 23:58:54 +01:00
parent 87cd6dfb54
commit 9d80458250
7 changed files with 145 additions and 83 deletions

View file

@ -1,27 +1,3 @@
# Build jbig2enc in a separate stage
FROM debian:bullseye-slim as jbig2enc_builder
RUN apt-get update && \
apt-get install -y --no-install-recommends \
git \
automake \
autoconf \
libtool \
libleptonica-dev \
pkg-config \
ca-certificates \
zlib1g-dev \
make \
g++
RUN git clone https://github.com/agl/jbig2enc && \
cd jbig2enc && \
./autogen.sh && \
./configure && \
make && \
make install
# Main stage
FROM openjdk:17-jdk-slim AS base
RUN apt-get update && \
@ -59,4 +35,3 @@ RUN apt-get update && \
# Final stage: Copy necessary files from the previous stage
FROM base
COPY --from=python-packages /usr/local /usr/local
COPY --from=jbig2enc_builder /usr/local/bin/jbig2 /usr/local/bin/jbig2

View file

@ -5,8 +5,8 @@ import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
@ -45,55 +45,98 @@ public class CompressController {
@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
@Operation(summary = "Optimize PDF file", description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters.")
public ResponseEntity<byte[]> optimizePdf(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be optimized.", required = true) MultipartFile inputFile,
@RequestParam("optimizeLevel") @Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.", schema = @Schema(allowableValues = {
"0", "1", "2", "3" }), example = "1") int optimizeLevel,
@RequestParam("expectedOutputSize") @Parameter(description = "The expected output size in bytes.", required = false) Long expectedOutputSize)
throws IOException, InterruptedException {
@RequestPart(value = "fileInput") @Parameter(description = "The input PDF file to be optimized.", required = true) MultipartFile inputFile,
@RequestParam(required = false, value = "optimizeLevel") @Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.", schema = @Schema(allowableValues = {
"1", "2", "3", "4", "5" })) Integer optimizeLevel,
@RequestParam(value = "expectedOutputSize", required = false) @Parameter(description = "The expected output size, e.g. '100MB', '25KB', etc.", required = false) String expectedOutputSizeString)
throws Exception {
if(expectedOutputSizeString == null && optimizeLevel == null) {
throw new Exception("Both expected output size and optimize level are not specified");
}
Long expectedOutputSize = 0L;
if (expectedOutputSizeString != null) {
expectedOutputSize = PdfUtils.convertSizeToBytes(expectedOutputSizeString);
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
long inputFileSize = Files.size(tempInputFile);
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the Ghostscript command
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-sDEVICE=pdfwrite");
command.add("-dCompatibilityLevel=1.4");
switch (optimizeLevel) {
case 0:
command.add("-dPDFSETTINGS=/default");
break;
case 1:
command.add("-dPDFSETTINGS=/ebook");
break;
case 2:
command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/prepress");
break;
default:
command.add("-dPDFSETTINGS=/default");
// Determine initial optimization level based on expected size reduction, only if optimizeLevel is not provided
if(optimizeLevel == null) {
double sizeReductionRatio = expectedOutputSize / (double) inputFileSize;
if (sizeReductionRatio > 0.7) {
optimizeLevel = 1;
} else if (sizeReductionRatio > 0.5) {
optimizeLevel = 2;
} else if (sizeReductionRatio > 0.35) {
optimizeLevel = 3;
} else {
optimizeLevel = 4;
}
}
boolean sizeMet = expectedOutputSize == 0L;
while (!sizeMet && optimizeLevel <= 5) {
// Prepare the Ghostscript command
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-sDEVICE=pdfwrite");
command.add("-dCompatibilityLevel=1.4");
switch (optimizeLevel) {
case 1:
command.add("-dPDFSETTINGS=/prepress");
break;
case 2:
command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/default");
break;
case 4:
command.add("-dPDFSETTINGS=/ebook");
break;
case 5:
command.add("-dPDFSETTINGS=/screen");
break;
default:
command.add("-dPDFSETTINGS=/default");
}
command.add("-dNOPAUSE");
command.add("-dQUIET");
command.add("-dBATCH");
command.add("-sOutputFile=" + tempOutputFile.toString());
command.add(tempInputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
// Check if file size is within expected size
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize <= expectedOutputSize) {
sizeMet = true;
} else {
// Increase optimization level for next iteration
optimizeLevel++;
System.out.println("Increasing ghostscript optimisation level to " + optimizeLevel);
}
}
command.add("-dNOPAUSE");
command.add("-dQUIET");
command.add("-dBATCH");
command.add("-sOutputFile=" + tempOutputFile.toString());
command.add(tempInputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
if (expectedOutputSize != null) {
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize > expectedOutputSize) {
try (PDDocument doc = PDDocument.load(new File(tempOutputFile.toString()))) {
long previousFileSize = 0;
double scaleFactor = 1.0;
while (true) {
for (PDPage page : doc.getPages()) {
@ -142,14 +185,21 @@ public class CompressController {
// save the document to tempOutputFile again
doc.save(tempOutputFile.toString());
long currentSize = Files.size(tempOutputFile);
// Check if the overall PDF size is still larger than expectedOutputSize
if (Files.size(tempOutputFile) > expectedOutputSize) {
if (currentSize > expectedOutputSize) {
// Log the current file size and scaleFactor
System.out.println("Current file size: " + FileUtils.byteCountToDisplaySize(currentSize));
System.out.println("Current scale factor: " + scaleFactor);
// The file is still too large, reduce scaleFactor and try again
scaleFactor *= 0.9; // reduce scaleFactor by 10%
// Avoid scaleFactor being too small, causing the image to shrink to 0
if(scaleFactor < 0.1){
throw new RuntimeException("Could not reach the desired size without excessively degrading image quality");
if(scaleFactor < 0.2 || previousFileSize == currentSize){
throw new RuntimeException("Could not reach the desired size without excessively degrading image quality, lowest size recommended is " + FileUtils.byteCountToDisplaySize(currentSize) + ", " + currentSize + " bytes");
}
previousFileSize = currentSize;
} else {
// The file is small enough, break the loop
break;

View file

@ -22,6 +22,7 @@ import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@ -287,4 +288,30 @@ public class PdfUtils {
return PdfUtils.boasToWebResponse(baos, docName);
}
public static Long convertSizeToBytes(String sizeStr) {
if (sizeStr == null) {
return null;
}
sizeStr = sizeStr.trim().toUpperCase();
try {
if (sizeStr.endsWith("KB")) {
return Long.parseLong(sizeStr.substring(0, sizeStr.length() - 2)) * 1024;
} else if (sizeStr.endsWith("MB")) {
return Long.parseLong(sizeStr.substring(0, sizeStr.length() - 2)) * 1024 * 1024;
} else if (sizeStr.endsWith("GB")) {
return Long.parseLong(sizeStr.substring(0, sizeStr.length() - 2)) * 1024 * 1024 * 1024;
} else if (sizeStr.endsWith("B")) {
return Long.parseLong(sizeStr.substring(0, sizeStr.length() - 1));
} else {
// Input string does not have a valid format, handle this case
}
} catch (NumberFormatException e) {
// The numeric part of the input string cannot be parsed, handle this case
}
return null;
}
}

View file

@ -224,12 +224,7 @@ compress.title=Compress
compress.header=Compress PDF
compress.credit=This service uses OCRmyPDF for PDF Compress/Optimisation.
compress.selectText.1=Optimization level:
compress.selectText.2=0 (No optimization)
compress.selectText.3=1 (Default, lossless optimization)
compress.selectText.4=2 (Lossy optimization)
compress.selectText.5=3 (Lossy optimization, more aggressive)
compress.selectText.6=Enable fast web view (linearize PDF)
compress.selectText.7=Enable lossy JBIG2 encoding
compress.selectText.2=Expected PDF Size (e.g. 100MB, 25KB, 500B)
compress.submit=Compress

View file

@ -5,6 +5,11 @@ body {
background-color: rgb(var(--body-background-color)) !important;
color: rgb(var(--base-font-color)) !important;
}
.card {
background-color: rgb(var(--body-background-color)) !important;
border: 1px solid #999;
color: rgb(var(--base-font-color)) !important;
}
.dark-card {
background-color: rgb(var(--body-background-color)) !important;

View file

@ -71,6 +71,11 @@ filter: invert(0.2) sepia(2) saturate(50) hue-rotate(190deg);
.favorite-icon img {
filter: brightness(0);
}
.jumbotron {
padding: 3rem 3rem; /* Reduce vertical padding */
}
</style>
<body>

View file

@ -17,24 +17,29 @@
<h2 th:text="#{compress.header}"></h2>
<form action="#" th:action="@{/compress-pdf}" method="post" enctype="multipart/form-data">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
<div>
<label for="optimizeLevel" th:text="#{compress.selectText.1}"></label>
<select name="optimizeLevel" id="optimizeLevel">
<option value="0" th:text="#{compress.selectText.2}"></option>
<option value="1" selected th:text="#{compress.selectText.3}"></option>
<option value="2" th:text="#{compress.selectText.4}"></option>
<option value="3" th:text="#{compress.selectText.5}"></option>
</select>
<div class="card mb-3">
<div class="card-body">
<h4>Manual Mode - From 1 to 5</h4>
<label for="optimizeLevel" th:text="#{compress.selectText.1}"></label>
<select name="optimizeLevel" id="optimizeLevel" class="form-control">
<option value="1">1</option>
<option value="2" selected>2</option>
<option value="3">3</option>
<option value="4">4</option>
<option value="5">5</option>
</select>
</div>
</div>
<div>
<label for="expectedOutputSize" th:text="#{compress.selectText.8}"></label>
<input type="number" name="expectedOutputSize" id="expectedOutputSize" min="1">
<div class="card mb-3">
<div class="card-body">
<h4>Auto mode - Auto adjusts quality to get PDF to exact size</h4>
<label for="expectedOutputSize" th:text="#{compress.selectText.2}"></label>
<input type="text" name="expectedOutputSize" id="expectedOutputSize" min="1" class="form-control">
</div>
</div>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{compress.submit}"></button>
</form>
<p class="mt-3" th:text="#{compress.credit}"></p>
</div>
</div>
</div>