This commit is contained in:
Anthony Stirling 2023-05-19 20:43:30 +01:00
parent f8c855eab1
commit 87cd6dfb54
3 changed files with 166 additions and 67 deletions

View file

@ -50,13 +50,13 @@ jobs:
id: meta
uses: docker/metadata-action@v4.4.0
with:
images: |
${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf
ghcr.io/${{ github.repository_owner }}/s-pdf
tags: |
${{ steps.versionNumber.outputs.versionNumber }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
type=raw,value=alpha,enable=${{ github.ref == 'refs/heads/main' }}
images: |
${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf
ghcr.io/${{ github.repository_owner }}/s-pdf
tags: |
${{ steps.versionNumber.outputs.versionNumber }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
type=raw,value=alpha,enable=${{ github.ref == 'refs/heads/main' }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v2.1.0
@ -76,6 +76,22 @@ jobs:
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64/v8
- name: Generate tags
id: meta2
uses: docker/metadata-action@v4.4.0
with:
images: |
${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf
ghcr.io/${{ github.repository_owner }}/s-pdf
tags: |
${{ steps.versionNumber.outputs.versionNumber }}-ultra-light
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
type=raw,value=alpha,enable=${{ github.ref == 'refs/heads/main' }}
- name: Convert repository owner to lowercase
id: repoowner
run: echo "::set-output name=lowercase::$(echo ${{ github.repository_owner }} | awk '{print tolower($0)}')"
- name: Build and push Dockerfile-ultralite
uses: docker/build-push-action@v4.0.0
with:
@ -84,12 +100,15 @@ jobs:
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
tags: ${{ steps.meta2.outputs.tags }}
labels: ${{ steps.meta2.outputs.labels }}
tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf:ultra-light-latest
ghcr.io/${{ github.repository_owner }}/s-pdf:ultra-light-latest
ghcr.io/${{ steps.repoowner.outputs.lowercase }}/s-pdf:ultra-light-latest
labels: |
${{ steps.meta.outputs.labels }}
${{ steps.meta2.outputs.labels }}
type=raw,value=ultra-light-latest,enable=${{ github.ref == 'refs/heads/master' }}
type=raw,value=ultra-light-alpha,enable=${{ github.ref == 'refs/heads/main' }}
platforms: linux/amd64,linux/arm64/v8

View file

@ -1,11 +1,27 @@
package stirling.software.SPDF.controller.api.other;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import javax.imageio.ImageIO;
import javax.imageio.stream.MemoryCacheImageOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.ResponseEntity;
@ -20,31 +36,20 @@ import io.swagger.v3.oas.annotations.Parameter;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import io.swagger.v3.oas.annotations.media.Schema;
@RestController
public class CompressController {
private static final Logger logger = LoggerFactory.getLogger(CompressController.class);
@PostMapping(consumes = "multipart/form-data", value = "/compress-pdf")
@Operation(
summary = "Optimize PDF file",
description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters."
)
@Operation(summary = "Optimize PDF file", description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters.")
public ResponseEntity<byte[]> optimizePdf(
@RequestPart(required = true, value = "fileInput")
@Parameter(description = "The input PDF file to be optimized.", required = true)
MultipartFile inputFile,
@RequestParam("optimizeLevel")
@Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.",
schema = @Schema(allowableValues = {"0", "1", "2", "3"}), example = "1")
int optimizeLevel,
@RequestParam(name = "fastWebView", required = false)
@Parameter(description = "If true, optimize the PDF for fast web view. This increases the file size by about 25%.", example = "false")
Boolean fastWebView,
@RequestParam(name = "jbig2Lossy", required = false)
@Parameter(description = "If true, apply lossy JB2 compression to the PDF file.", example = "false")
Boolean jbig2Lossy)
throws IOException, InterruptedException {
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be optimized.", required = true) MultipartFile inputFile,
@RequestParam("optimizeLevel") @Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.", schema = @Schema(allowableValues = {
"0", "1", "2", "3" }), example = "1") int optimizeLevel,
@RequestParam("expectedOutputSize") @Parameter(description = "The expected output size in bytes.", required = false) Long expectedOutputSize)
throws IOException, InterruptedException {
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
@ -53,31 +58,109 @@ public class CompressController {
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the OCRmyPDF command
// Prepare the Ghostscript command
List<String> command = new ArrayList<>();
command.add("ocrmypdf");
command.add("--skip-text");
command.add("--tesseract-timeout=0");
command.add("--optimize");
command.add(String.valueOf(optimizeLevel));
command.add("--output-type");
command.add("pdf");
command.add("gs");
command.add("-sDEVICE=pdfwrite");
command.add("-dCompatibilityLevel=1.4");
if (fastWebView != null && fastWebView) {
long fileSize = inputFile.getSize();
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
command.add("--fast-web-view");
command.add(String.valueOf(fastWebViewSize));
}
if (jbig2Lossy != null && jbig2Lossy) {
command.add("--jbig2-lossy");
switch (optimizeLevel) {
case 0:
command.add("-dPDFSETTINGS=/default");
break;
case 1:
command.add("-dPDFSETTINGS=/ebook");
break;
case 2:
command.add("-dPDFSETTINGS=/printer");
break;
case 3:
command.add("-dPDFSETTINGS=/prepress");
break;
default:
command.add("-dPDFSETTINGS=/default");
}
command.add("-dNOPAUSE");
command.add("-dQUIET");
command.add("-dBATCH");
command.add("-sOutputFile=" + tempOutputFile.toString());
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
if (expectedOutputSize != null) {
long outputFileSize = Files.size(tempOutputFile);
if (outputFileSize > expectedOutputSize) {
try (PDDocument doc = PDDocument.load(new File(tempOutputFile.toString()))) {
double scaleFactor = 1.0;
while (true) {
for (PDPage page : doc.getPages()) {
PDResources res = page.getResources();
for (COSName name : res.getXObjectNames()) {
PDXObject xobj = res.getXObject(name);
if (xobj instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject) xobj;
// Get the image in BufferedImage format
BufferedImage bufferedImage = image.getImage();
// Calculate the new dimensions
int newWidth = (int)(bufferedImage.getWidth() * scaleFactor);
int newHeight = (int)(bufferedImage.getHeight() * scaleFactor);
// If the new dimensions are zero, skip this iteration
if (newWidth == 0 || newHeight == 0) {
continue;
}
// Otherwise, proceed with the scaling
Image scaledImage = bufferedImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH);
// Convert the scaled image back to a BufferedImage
BufferedImage scaledBufferedImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null);
// Compress the scaled image
ByteArrayOutputStream compressedImageStream = new ByteArrayOutputStream();
ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream);
byte[] imageBytes = compressedImageStream.toByteArray();
compressedImageStream.close();
// Convert compressed image back to PDImageXObject
ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes);
PDImageXObject compressedImage = PDImageXObject.createFromByteArray(doc, imageBytes, image.getCOSObject().toString());
// Replace the image in the resources with the compressed version
res.put(name, compressedImage);
}
}
}
// save the document to tempOutputFile again
doc.save(tempOutputFile.toString());
// Check if the overall PDF size is still larger than expectedOutputSize
if (Files.size(tempOutputFile) > expectedOutputSize) {
// The file is still too large, reduce scaleFactor and try again
scaleFactor *= 0.9; // reduce scaleFactor by 10%
// Avoid scaleFactor being too small, causing the image to shrink to 0
if(scaleFactor < 0.1){
throw new RuntimeException("Could not reach the desired size without excessively degrading image quality");
}
} else {
// The file is small enough, break the loop
break;
}
}
}
}
}
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);

View file

@ -11,31 +11,28 @@
<div id="content-wrap">
<div th:insert="~{fragments/navbar.html :: navbar}"></div>
<br> <br>
<div class="container">
<div class="container">R
<div class="row justify-content-center">
<div class="col-md-6">
<h2 th:text="#{compress.header}"></h2>
<form action="#" th:action="@{/compress-pdf}" method="post" enctype="multipart/form-data">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
<div>
<label for="optimizeLevel" th:text="#{compress.selectText.1}"></label>
<select name="optimizeLevel" id="optimizeLevel">
<option value="0" th:text="#{compress.selectText.2}"></option>
<option value="1" selected th:text="#{compress.selectText.3}"></option>
<option value="2" th:text="#{compress.selectText.4}"></option>
<option value="3" th:text="#{compress.selectText.5}"></option>
</select>
</div>
<div>
<input type="checkbox" name="fastWebView" id="fastWebView">
<label for="fastWebView" th:text="#{compress.selectText.6}"></label>
</div>
<div>
<input type="checkbox" name="jbig2Lossy" id="jbig2Lossy">
<label for="jbig2Lossy" th:text="#{compress.selectText.7}"></label>
</div>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{compress.submit}"></button>
</form>
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
<div>
<label for="optimizeLevel" th:text="#{compress.selectText.1}"></label>
<select name="optimizeLevel" id="optimizeLevel">
<option value="0" th:text="#{compress.selectText.2}"></option>
<option value="1" selected th:text="#{compress.selectText.3}"></option>
<option value="2" th:text="#{compress.selectText.4}"></option>
<option value="3" th:text="#{compress.selectText.5}"></option>
</select>
</div>
<div>
<label for="expectedOutputSize" th:text="#{compress.selectText.8}"></label>
<input type="number" name="expectedOutputSize" id="expectedOutputSize" min="1">
</div>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{compress.submit}"></button>
</form>
<p class="mt-3" th:text="#{compress.credit}"></p>
</div>