From 77411e94a48bc35a35a316c334ac1a1b664d1424 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Tue, 1 Aug 2023 00:03:13 +0100 Subject: [PATCH] new features --- build.gradle | 3 +- .../api/ToSinglePageController.java | 86 +++++++++++++++++ .../api/converters/ConvertHtmlToPDF.java | 81 ++-------------- .../api/converters/ConvertMarkdownToPdf.java | 52 ++++++++++ .../web/ConverterWebController.java | 9 +- .../controller/web/GeneralWebController.java | 14 +++ .../software/SPDF/utils/FileToPdf.java | 95 +++++++++++++++++++ .../software/SPDF/utils/WebResponseUtils.java | 17 ++++ src/main/resources/messages_en_GB.properties | 19 ++++ src/main/resources/static/images/extract.svg | 3 + src/main/resources/static/images/info.svg | 4 + src/main/resources/static/images/markdown.svg | 3 + .../resources/static/images/single-page.svg | 4 + .../resources/templates/auto-split-pdf.html | 2 +- .../templates/convert/markdown-to-pdf.html | 30 ++++++ .../resources/templates/extract-page.html | 33 +++++++ src/main/resources/templates/home.html | 7 ++ .../templates/pdf-to-single-page.html | 29 ++++++ 18 files changed, 413 insertions(+), 78 deletions(-) create mode 100644 src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java create mode 100644 src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java create mode 100644 src/main/java/stirling/software/SPDF/utils/FileToPdf.java create mode 100644 src/main/resources/static/images/extract.svg create mode 100644 src/main/resources/static/images/info.svg create mode 100644 src/main/resources/static/images/markdown.svg create mode 100644 src/main/resources/static/images/single-page.svg create mode 100644 src/main/resources/templates/convert/markdown-to-pdf.html create mode 100644 src/main/resources/templates/extract-page.html create mode 100644 src/main/resources/templates/pdf-to-single-page.html diff --git a/build.gradle b/build.gradle index 688bee00..e851d9a5 100644 --- a/build.gradle +++ b/build.gradle @@ -61,8 +61,9 @@ dependencies { implementation 'com.itextpdf:itext7-core:7.2.5' implementation 'org.springframework.boot:spring-boot-starter-actuator' implementation 'io.micrometer:micrometer-core' - implementation group: 'com.google.zxing', name: 'core', version: '3.5.1' + // https://mvnrepository.com/artifact/org.commonmark/commonmark + implementation 'org.commonmark:commonmark:0.21.0' developmentOnly("org.springframework.boot:spring-boot-devtools") diff --git a/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java b/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java new file mode 100644 index 00000000..2c249b85 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java @@ -0,0 +1,86 @@ +package stirling.software.SPDF.controller.api; + +import java.io.IOException; +import java.io.ByteArrayOutputStream; +import com.itextpdf.kernel.pdf.*; +import com.itextpdf.kernel.pdf.xobject.PdfFormXObject; +import com.itextpdf.kernel.geom.PageSize; +import com.itextpdf.kernel.geom.Rectangle; +import com.itextpdf.layout.Document; +import com.itextpdf.layout.element.Image; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageTree; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.Parameter; +import io.swagger.v3.oas.annotations.tags.Tag; +import stirling.software.SPDF.utils.WebResponseUtils; +import org.apache.pdfbox.pdmodel.*; +import org.apache.pdfbox.multipdf.PDFMergerUtility; +@RestController +@Tag(name = "General", description = "General APIs") +public class ToSinglePageController { + + private static final Logger logger = LoggerFactory.getLogger(ToSinglePageController.class); + + + @PostMapping(consumes = "multipart/form-data", value = "/pdf-to-single-page") + @Operation( + summary = "Convert a multi-page PDF into a single long page PDF", + description = "This endpoint converts a multi-page PDF document into a single paged PDF document. The width of the single page will be same as the input's width, but the height will be the sum of all the pages' heights. Input:PDF Output:PDF Type:SISO" + ) + public ResponseEntity pdfToSinglePage( + @RequestPart(required = true, value = "fileInput") + @Parameter(description = "The input multi-page PDF file to be converted into a single page", required = true) + MultipartFile file) throws IOException { + + PdfReader reader = new PdfReader(file.getInputStream()); + PdfDocument sourceDocument = new PdfDocument(reader); + + float totalHeight = 0; + float width = 0; + + for (int i = 1; i <= sourceDocument.getNumberOfPages(); i++) { + Rectangle pageSize = sourceDocument.getPage(i).getPageSize(); + totalHeight += pageSize.getHeight(); + if(width < pageSize.getWidth()) + width = pageSize.getWidth(); + } + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PdfWriter writer = new PdfWriter(baos); + PdfDocument newDocument = new PdfDocument(writer); + PageSize newPageSize = new PageSize(width, totalHeight); + newDocument.addNewPage(newPageSize); + + Document layoutDoc = new Document(newDocument); + float yOffset = totalHeight; + + for (int i = 1; i <= sourceDocument.getNumberOfPages(); i++) { + PdfFormXObject pageCopy = sourceDocument.getPage(i).copyAsFormXObject(newDocument); + Image copiedPage = new Image(pageCopy); + copiedPage.setFixedPosition(0, yOffset - sourceDocument.getPage(i).getPageSize().getHeight()); + yOffset -= sourceDocument.getPage(i).getPageSize().getHeight(); + layoutDoc.add(copiedPage); + } + + layoutDoc.close(); + sourceDocument.close(); + + byte[] result = baos.toByteArray(); + return WebResponseUtils.bytesToWebResponse(result, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_singlePage.pdf"); + } +} \ No newline at end of file diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java index a5878b04..e054d7f0 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java @@ -19,6 +19,7 @@ import org.springframework.web.multipart.MultipartFile; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; +import stirling.software.SPDF.utils.FileToPdf; import stirling.software.SPDF.utils.GeneralUtils; import stirling.software.SPDF.utils.ProcessExecutor; import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; @@ -44,87 +45,17 @@ public class ConvertHtmlToPDF { String originalFilename = fileInput.getOriginalFilename(); if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) { throw new IllegalArgumentException("File must be either .html or .zip format."); - } - Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - Path tempInputFile = null; - byte[] pdfBytes; - try { - if (originalFilename.endsWith(".html")) { - tempInputFile = Files.createTempFile("input_", ".html"); - Files.write(tempInputFile, fileInput.getBytes()); - } else { - tempInputFile = unzipAndGetMainHtml(fileInput); - } - - List command = new ArrayList<>(); - command.add("weasyprint"); - command.add(tempInputFile.toString()); - command.add(tempOutputFile.toString()); - ProcessExecutorResult returnCode; - if (originalFilename.endsWith(".zip")) { - returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) - .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile()); - } else { - - returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) - .runCommandWithOutputHandling(command); - } - - pdfBytes = Files.readAllBytes(tempOutputFile); - } finally { - // Clean up temporary files - Files.delete(tempOutputFile); - Files.delete(tempInputFile); - - if (originalFilename.endsWith(".zip")) { - GeneralUtils.deleteDirectory(tempInputFile.getParent()); - } - } + }byte[] pdfBytes = FileToPdf.convertHtmlToPdf( fileInput.getBytes(), originalFilename); + String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf + return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); } - - - private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException { - Path tempDirectory = Files.createTempDirectory("unzipped_"); - try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) { - ZipEntry entry = zipIn.getNextEntry(); - while (entry != null) { - Path filePath = tempDirectory.resolve(entry.getName()); - if (entry.isDirectory()) { - Files.createDirectories(filePath); // Explicitly create the directory structure - } else { - Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist - Files.copy(zipIn, filePath); - } - zipIn.closeEntry(); - entry = zipIn.getNextEntry(); - } - } - - //search for the main HTML file. - try (Stream walk = Files.walk(tempDirectory)) { - List htmlFiles = walk.filter(file -> file.toString().endsWith(".html")) - .collect(Collectors.toList()); - - if (htmlFiles.isEmpty()) { - throw new IOException("No HTML files found in the unzipped directory."); - } - - // Prioritize 'index.html' if it exists, otherwise use the first .html file - for (Path htmlFile : htmlFiles) { - if (htmlFile.getFileName().toString().equals("index.html")) { - return htmlFile; - } - } - - return htmlFiles.get(0); - } - } + - + } diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java new file mode 100644 index 00000000..c1bc1b73 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java @@ -0,0 +1,52 @@ +package stirling.software.SPDF.controller.api.converters; + +import java.io.IOException; + +import org.commonmark.node.Node; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import stirling.software.SPDF.utils.FileToPdf; +import stirling.software.SPDF.utils.WebResponseUtils; + +@RestController +@Tag(name = "Convert", description = "Convert APIs") +public class ConvertMarkdownToPdf { + + @PostMapping(consumes = "multipart/form-data", value = "/markdown-to-pdf") + @Operation( + summary = "Convert a Markdown file to PDF", + description = "This endpoint takes a Markdown file input, converts it to HTML, and then to PDF format." + ) + public ResponseEntity markdownToPdf( + @RequestPart(required = true, value = "fileInput") MultipartFile fileInput) + throws IOException, InterruptedException { + + if (fileInput == null) { + throw new IllegalArgumentException("Please provide a Markdown file for conversion."); + } + + String originalFilename = fileInput.getOriginalFilename(); + if (originalFilename == null || !originalFilename.endsWith(".md")) { + throw new IllegalArgumentException("File must be in .md format."); + } + + // Convert Markdown to HTML using CommonMark + Parser parser = Parser.builder().build(); + Node document = parser.parse(new String(fileInput.getBytes())); + HtmlRenderer renderer = HtmlRenderer.builder().build(); + String htmlContent = renderer.render(document); + + byte[] pdfBytes = FileToPdf.convertHtmlToPdf(htmlContent.getBytes(), "converted.html"); + + String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf + return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); + } +} diff --git a/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java b/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java index 90429f1a..76e7be8f 100644 --- a/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java +++ b/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java @@ -25,7 +25,14 @@ public class ConverterWebController { model.addAttribute("currentPage", "html-to-pdf"); return "convert/html-to-pdf"; } - + @GetMapping("/markdown-to-pdf") + @Hidden + public String convertMarkdownToPdfForm(Model model) { + model.addAttribute("currentPage", "markdown-to-pdf"); + return "convert/markdown-to-pdf"; + } + + @GetMapping("/url-to-pdf") @Hidden public String convertURLToPdfForm(Model model) { diff --git a/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java b/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java index 75d67401..4d6e991a 100644 --- a/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java +++ b/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java @@ -97,6 +97,20 @@ public class GeneralWebController { return "pdf-organizer"; } + @GetMapping("/extract-page") + @Hidden + public String extractPages(Model model) { + model.addAttribute("currentPage", "extract-page"); + return "extract-page"; + } + + @GetMapping("/pdf-to-single-page") + @Hidden + public String pdfToSinglePage(Model model) { + model.addAttribute("currentPage", "pdf-to-single-page"); + return "pdf-to-single-page"; + } + @GetMapping("/rotate-pdf") @Hidden public String rotatePdfForm(Model model) { diff --git a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java new file mode 100644 index 00000000..9515a3ac --- /dev/null +++ b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java @@ -0,0 +1,95 @@ +package stirling.software.SPDF.utils; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; + +public class FileToPdf { + public static byte[] convertHtmlToPdf(byte[] fileBytes, String fileName) throws IOException, InterruptedException { + + Path tempOutputFile = Files.createTempFile("output_", ".pdf"); + Path tempInputFile = null; + byte[] pdfBytes; + try { + if (fileName.endsWith(".html")) { + tempInputFile = Files.createTempFile("input_", ".html"); + Files.write(tempInputFile, fileBytes); + } else { + tempInputFile = unzipAndGetMainHtml(fileBytes); + } + + List command = new ArrayList<>(); + command.add("weasyprint"); + command.add(tempInputFile.toString()); + command.add(tempOutputFile.toString()); + ProcessExecutorResult returnCode; + if (fileName.endsWith(".zip")) { + returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) + .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile()); + } else { + + returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) + .runCommandWithOutputHandling(command); + } + + pdfBytes = Files.readAllBytes(tempOutputFile); + } finally { + // Clean up temporary files + Files.delete(tempOutputFile); + Files.delete(tempInputFile); + + if (fileName.endsWith(".zip")) { + GeneralUtils.deleteDirectory(tempInputFile.getParent()); + } + } + + return pdfBytes; + } + + + private static Path unzipAndGetMainHtml(byte[] fileBytes) throws IOException { + Path tempDirectory = Files.createTempDirectory("unzipped_"); + try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(fileBytes))) { + ZipEntry entry = zipIn.getNextEntry(); + while (entry != null) { + Path filePath = tempDirectory.resolve(entry.getName()); + if (entry.isDirectory()) { + Files.createDirectories(filePath); // Explicitly create the directory structure + } else { + Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist + Files.copy(zipIn, filePath); + } + zipIn.closeEntry(); + entry = zipIn.getNextEntry(); + } + } + + //search for the main HTML file. + try (Stream walk = Files.walk(tempDirectory)) { + List htmlFiles = walk.filter(file -> file.toString().endsWith(".html")) + .collect(Collectors.toList()); + + if (htmlFiles.isEmpty()) { + throw new IOException("No HTML files found in the unzipped directory."); + } + + // Prioritize 'index.html' if it exists, otherwise use the first .html file + for (Path htmlFile : htmlFiles) { + if (htmlFile.getFileName().toString().equals("index.html")) { + return htmlFile; + } + } + + return htmlFiles.get(0); + } + } +} diff --git a/src/main/java/stirling/software/SPDF/utils/WebResponseUtils.java b/src/main/java/stirling/software/SPDF/utils/WebResponseUtils.java index 59c0b056..09a395ba 100644 --- a/src/main/java/stirling/software/SPDF/utils/WebResponseUtils.java +++ b/src/main/java/stirling/software/SPDF/utils/WebResponseUtils.java @@ -12,6 +12,9 @@ import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.multipart.MultipartFile; +import com.itextpdf.kernel.pdf.PdfDocument; +import com.itextpdf.kernel.pdf.PdfWriter; + public class WebResponseUtils { public static ResponseEntity boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException { @@ -57,5 +60,19 @@ public class WebResponseUtils { return boasToWebResponse(baos, docName); } + + public static ResponseEntity pdfDocToWebResponse(PdfDocument document, String docName) throws IOException { + + // Open Byte Array and save document to it + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PdfWriter writer = new PdfWriter(baos); + PdfDocument newDocument = new PdfDocument(writer); + + document.copyPagesTo(1, document.getNumberOfPages(), newDocument); + newDocument.close(); + + return boasToWebResponse(baos, docName); + } + } diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index 0d881b65..807e8581 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -236,6 +236,25 @@ home.HTMLToPDF.desc=Converts any HTML file or zip to PDF HTMLToPDF.tags=markup,web-content,transformation,convert +home.MarkdownToPDF.title=Markdown to PDF +home.MarkdownToPDF.desc=Converts any Markdown fileto PDF +MarkdownToPDF.tags=markup,web-content,transformation,convert + + +home.getPdfInfo.title=Get ALL Info on PDF +home.getPdfInfo.desc=Grabs any and all information possible on PDFs +getPdfInfo.tags=infomation,data,stats,statistics + + +home.extractPage.title=Extract page(s) +home.extractPage.desc=Extracts select pages from PDF +extractPage.tags=extract + + +home.PdfToSinglePage.title=PDF to Single Large Page +home.PdfToSinglePage.desc=Merges all PDF pages into one large single page +PdfToSinglePage.tags=single page + ########################### # # # WEB PAGES # diff --git a/src/main/resources/static/images/extract.svg b/src/main/resources/static/images/extract.svg new file mode 100644 index 00000000..d21f03eb --- /dev/null +++ b/src/main/resources/static/images/extract.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/src/main/resources/static/images/info.svg b/src/main/resources/static/images/info.svg new file mode 100644 index 00000000..8f48f86c --- /dev/null +++ b/src/main/resources/static/images/info.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/main/resources/static/images/markdown.svg b/src/main/resources/static/images/markdown.svg new file mode 100644 index 00000000..ca5cd597 --- /dev/null +++ b/src/main/resources/static/images/markdown.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/src/main/resources/static/images/single-page.svg b/src/main/resources/static/images/single-page.svg new file mode 100644 index 00000000..4f57d79b --- /dev/null +++ b/src/main/resources/static/images/single-page.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/main/resources/templates/auto-split-pdf.html b/src/main/resources/templates/auto-split-pdf.html index 4f3045e0..d6f68fe1 100644 --- a/src/main/resources/templates/auto-split-pdf.html +++ b/src/main/resources/templates/auto-split-pdf.html @@ -22,7 +22,7 @@
  • -
    +

    diff --git a/src/main/resources/templates/convert/markdown-to-pdf.html b/src/main/resources/templates/convert/markdown-to-pdf.html new file mode 100644 index 00000000..4637d129 --- /dev/null +++ b/src/main/resources/templates/convert/markdown-to-pdf.html @@ -0,0 +1,30 @@ + + + + + + +
    +
    +
    +

    +
    +
    +
    +

    + +
    +
    + + + +

    +

    +
    +
    +
    +
    +
    +
    + + diff --git a/src/main/resources/templates/extract-page.html b/src/main/resources/templates/extract-page.html new file mode 100644 index 00000000..0a5fb158 --- /dev/null +++ b/src/main/resources/templates/extract-page.html @@ -0,0 +1,33 @@ + + + + + + + +
    +
    +
    +

    +
    +
    +
    +

    +
    +
    + +
    + + +
    + + +
    +
    +
    +
    +
    +
    +
    + + \ No newline at end of file diff --git a/src/main/resources/templates/home.html b/src/main/resources/templates/home.html index 8cbb6ca7..c5c8a0da 100644 --- a/src/main/resources/templates/home.html +++ b/src/main/resources/templates/home.html @@ -84,6 +84,13 @@
    +
    +
    +
    +
    + + +
    diff --git a/src/main/resources/templates/pdf-to-single-page.html b/src/main/resources/templates/pdf-to-single-page.html new file mode 100644 index 00000000..1f5b64c6 --- /dev/null +++ b/src/main/resources/templates/pdf-to-single-page.html @@ -0,0 +1,29 @@ + + + + + + + +
    +
    +
    +

    +
    +
    +
    +

    +
    +

    +
    + +
    +
    +
    +
    + +
    +
    +
    + + \ No newline at end of file