From 87cd6dfb54efaa6b842bb71916be937652c713ce Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Fri, 19 May 2023 20:43:30 +0100 Subject: [PATCH] compress --- .github/workflows/push-docker.yml | 37 ++++- .../api/other/CompressController.java | 157 +++++++++++++----- .../templates/other/compress-pdf.html | 39 ++--- 3 files changed, 166 insertions(+), 67 deletions(-) diff --git a/.github/workflows/push-docker.yml b/.github/workflows/push-docker.yml index 1ffb470e..f76434a8 100644 --- a/.github/workflows/push-docker.yml +++ b/.github/workflows/push-docker.yml @@ -50,13 +50,13 @@ jobs: id: meta uses: docker/metadata-action@v4.4.0 with: - images: | - ${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf - ghcr.io/${{ github.repository_owner }}/s-pdf - tags: | - ${{ steps.versionNumber.outputs.versionNumber }} - type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }} - type=raw,value=alpha,enable=${{ github.ref == 'refs/heads/main' }} + images: | + ${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf + ghcr.io/${{ github.repository_owner }}/s-pdf + tags: | + ${{ steps.versionNumber.outputs.versionNumber }} + type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }} + type=raw,value=alpha,enable=${{ github.ref == 'refs/heads/main' }} - name: Set up QEMU uses: docker/setup-qemu-action@v2.1.0 @@ -76,6 +76,22 @@ jobs: labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64/v8 + - name: Generate tags + id: meta2 + uses: docker/metadata-action@v4.4.0 + with: + images: | + ${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf + ghcr.io/${{ github.repository_owner }}/s-pdf + tags: | + ${{ steps.versionNumber.outputs.versionNumber }}-ultra-light + type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }} + type=raw,value=alpha,enable=${{ github.ref == 'refs/heads/main' }} + + - name: Convert repository owner to lowercase + id: repoowner + run: echo "::set-output name=lowercase::$(echo ${{ github.repository_owner }} | awk '{print tolower($0)}')" + - name: Build and push Dockerfile-ultralite uses: docker/build-push-action@v4.0.0 with: @@ -84,12 +100,15 @@ jobs: push: true cache-from: type=gha cache-to: type=gha,mode=max + tags: ${{ steps.meta2.outputs.tags }} + labels: ${{ steps.meta2.outputs.labels }} tags: | ${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf:ultra-light-latest - ghcr.io/${{ github.repository_owner }}/s-pdf:ultra-light-latest + ghcr.io/${{ steps.repoowner.outputs.lowercase }}/s-pdf:ultra-light-latest labels: | - ${{ steps.meta.outputs.labels }} + ${{ steps.meta2.outputs.labels }} type=raw,value=ultra-light-latest,enable=${{ github.ref == 'refs/heads/master' }} type=raw,value=ultra-light-alpha,enable=${{ github.ref == 'refs/heads/main' }} platforms: linux/amd64,linux/arm64/v8 + diff --git a/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java b/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java index 89f00981..009cd6e9 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java @@ -1,11 +1,27 @@ package stirling.software.SPDF.controller.api.other; +import java.awt.Image; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; + +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; + import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import javax.imageio.ImageIO; +import javax.imageio.stream.MemoryCacheImageOutputStream; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.http.ResponseEntity; @@ -20,31 +36,20 @@ import io.swagger.v3.oas.annotations.Parameter; import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; import io.swagger.v3.oas.annotations.media.Schema; + @RestController public class CompressController { private static final Logger logger = LoggerFactory.getLogger(CompressController.class); @PostMapping(consumes = "multipart/form-data", value = "/compress-pdf") - @Operation( - summary = "Optimize PDF file", - description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters." - ) + @Operation(summary = "Optimize PDF file", description = "This endpoint accepts a PDF file and optimizes it based on the provided parameters.") public ResponseEntity optimizePdf( - @RequestPart(required = true, value = "fileInput") - @Parameter(description = "The input PDF file to be optimized.", required = true) - MultipartFile inputFile, - @RequestParam("optimizeLevel") - @Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.", - schema = @Schema(allowableValues = {"0", "1", "2", "3"}), example = "1") - int optimizeLevel, - @RequestParam(name = "fastWebView", required = false) - @Parameter(description = "If true, optimize the PDF for fast web view. This increases the file size by about 25%.", example = "false") - Boolean fastWebView, - @RequestParam(name = "jbig2Lossy", required = false) - @Parameter(description = "If true, apply lossy JB2 compression to the PDF file.", example = "false") - Boolean jbig2Lossy) - throws IOException, InterruptedException { + @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be optimized.", required = true) MultipartFile inputFile, + @RequestParam("optimizeLevel") @Parameter(description = "The level of optimization to apply to the PDF file. Higher values indicate greater compression but may reduce quality.", schema = @Schema(allowableValues = { + "0", "1", "2", "3" }), example = "1") int optimizeLevel, + @RequestParam("expectedOutputSize") @Parameter(description = "The expected output size in bytes.", required = false) Long expectedOutputSize) + throws IOException, InterruptedException { // Save the uploaded file to a temporary location Path tempInputFile = Files.createTempFile("input_", ".pdf"); @@ -53,31 +58,109 @@ public class CompressController { // Prepare the output file path Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - // Prepare the OCRmyPDF command + // Prepare the Ghostscript command List command = new ArrayList<>(); - command.add("ocrmypdf"); - command.add("--skip-text"); - command.add("--tesseract-timeout=0"); - command.add("--optimize"); - command.add(String.valueOf(optimizeLevel)); - command.add("--output-type"); - command.add("pdf"); + command.add("gs"); + command.add("-sDEVICE=pdfwrite"); + command.add("-dCompatibilityLevel=1.4"); - if (fastWebView != null && fastWebView) { - long fileSize = inputFile.getSize(); - long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size - command.add("--fast-web-view"); - command.add(String.valueOf(fastWebViewSize)); - } - - if (jbig2Lossy != null && jbig2Lossy) { - command.add("--jbig2-lossy"); + switch (optimizeLevel) { + case 0: + command.add("-dPDFSETTINGS=/default"); + break; + case 1: + command.add("-dPDFSETTINGS=/ebook"); + break; + case 2: + command.add("-dPDFSETTINGS=/printer"); + break; + case 3: + command.add("-dPDFSETTINGS=/prepress"); + break; + default: + command.add("-dPDFSETTINGS=/default"); } + command.add("-dNOPAUSE"); + command.add("-dQUIET"); + command.add("-dBATCH"); + command.add("-sOutputFile=" + tempOutputFile.toString()); command.add(tempInputFile.toString()); - command.add(tempOutputFile.toString()); - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); + int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command); + + if (expectedOutputSize != null) { + long outputFileSize = Files.size(tempOutputFile); + if (outputFileSize > expectedOutputSize) { + try (PDDocument doc = PDDocument.load(new File(tempOutputFile.toString()))) { + + double scaleFactor = 1.0; + while (true) { + for (PDPage page : doc.getPages()) { + PDResources res = page.getResources(); + + for (COSName name : res.getXObjectNames()) { + PDXObject xobj = res.getXObject(name); + if (xobj instanceof PDImageXObject) { + PDImageXObject image = (PDImageXObject) xobj; + + // Get the image in BufferedImage format + BufferedImage bufferedImage = image.getImage(); + + // Calculate the new dimensions + int newWidth = (int)(bufferedImage.getWidth() * scaleFactor); + int newHeight = (int)(bufferedImage.getHeight() * scaleFactor); + + // If the new dimensions are zero, skip this iteration + if (newWidth == 0 || newHeight == 0) { + continue; + } + + // Otherwise, proceed with the scaling + Image scaledImage = bufferedImage.getScaledInstance(newWidth, newHeight, Image.SCALE_SMOOTH); + + // Convert the scaled image back to a BufferedImage + BufferedImage scaledBufferedImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB); + scaledBufferedImage.getGraphics().drawImage(scaledImage, 0, 0, null); + + // Compress the scaled image + ByteArrayOutputStream compressedImageStream = new ByteArrayOutputStream(); + ImageIO.write(scaledBufferedImage, "jpeg", compressedImageStream); + byte[] imageBytes = compressedImageStream.toByteArray(); + compressedImageStream.close(); + + // Convert compressed image back to PDImageXObject + ByteArrayInputStream bais = new ByteArrayInputStream(imageBytes); + PDImageXObject compressedImage = PDImageXObject.createFromByteArray(doc, imageBytes, image.getCOSObject().toString()); + + // Replace the image in the resources with the compressed version + res.put(name, compressedImage); + } + } + } + + // save the document to tempOutputFile again + doc.save(tempOutputFile.toString()); + + // Check if the overall PDF size is still larger than expectedOutputSize + if (Files.size(tempOutputFile) > expectedOutputSize) { + // The file is still too large, reduce scaleFactor and try again + scaleFactor *= 0.9; // reduce scaleFactor by 10% + // Avoid scaleFactor being too small, causing the image to shrink to 0 + if(scaleFactor < 0.1){ + throw new RuntimeException("Could not reach the desired size without excessively degrading image quality"); + } + } else { + // The file is small enough, break the loop + break; + } + } + + } + + + } + } // Read the optimized PDF file byte[] pdfBytes = Files.readAllBytes(tempOutputFile); diff --git a/src/main/resources/templates/other/compress-pdf.html b/src/main/resources/templates/other/compress-pdf.html index f173dba4..a082e622 100644 --- a/src/main/resources/templates/other/compress-pdf.html +++ b/src/main/resources/templates/other/compress-pdf.html @@ -11,31 +11,28 @@


-
+
R

-
-
- - -
-
- - -
-
- - -
- -
+
+
+ + +
+
+ + +
+ + +