diff --git a/Dockerfile b/Dockerfile index b1d91a48..c98a21c8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Build jbig2enc in a separate stage -FROM frooodle/stirling-pdf-base:latest +FROM frooodle/stirling-pdf-base:beta4 # Create scripts folder and copy local scripts RUN mkdir /scripts diff --git a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java index 24f2822d..322de0e2 100644 --- a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java +++ b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java @@ -83,7 +83,9 @@ public class EndpointConfiguration { addEndpointToGroup("Convert", "pdf-to-text"); addEndpointToGroup("Convert", "pdf-to-html"); addEndpointToGroup("Convert", "pdf-to-xml"); - + addEndpointToGroup("Convert", "html-to-pdf"); + addEndpointToGroup("Convert", "url-to-pdf"); + // Adding endpoints to "Security" group addEndpointToGroup("Security", "add-password"); addEndpointToGroup("Security", "remove-password"); @@ -125,12 +127,15 @@ public class EndpointConfiguration { addEndpointToGroup("CLI", "pdf-to-html"); addEndpointToGroup("CLI", "pdf-to-xml"); addEndpointToGroup("CLI", "ocr-pdf"); + addEndpointToGroup("CLI", "html-to-pdf"); + addEndpointToGroup("CLI", "url-to-pdf"); + //python addEndpointToGroup("Python", "extract-image-scans"); addEndpointToGroup("Python", "remove-blanks"); - - + addEndpointToGroup("Python", "html-to-pdf"); + addEndpointToGroup("Python", "url-to-pdf"); //openCV addEndpointToGroup("OpenCV", "extract-image-scans"); diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java index d30d01dd..2d792ee3 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java @@ -4,9 +4,13 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import java.util.*; + import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestPart; @@ -40,61 +44,83 @@ public class ConvertHtmlToPDF { if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) { throw new IllegalArgumentException("File must be either .html or .zip format."); } - Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - Path tempInputFile; - - if (originalFilename.endsWith(".html")) { - tempInputFile = Files.createTempFile("input_", ".html"); - Files.write(tempInputFile, fileInput.getBytes()); - } else { - tempInputFile = unzipAndGetMainHtml(fileInput); + Path tempInputFile = null; + byte[] pdfBytes; + try { + if (originalFilename.endsWith(".html")) { + tempInputFile = Files.createTempFile("input_", ".html"); + Files.write(tempInputFile, fileInput.getBytes()); + } else { + tempInputFile = unzipAndGetMainHtml(fileInput); + } + + List command = new ArrayList<>(); + command.add("weasyprint"); + command.add(tempInputFile.toString()); + command.add(tempOutputFile.toString()); + int returnCode = 0; + if (originalFilename.endsWith(".zip")) { + returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) + .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile()); + } else { + + returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) + .runCommandWithOutputHandling(command); + } + + pdfBytes = Files.readAllBytes(tempOutputFile); + } finally { + // Clean up temporary files + Files.delete(tempOutputFile); + Files.delete(tempInputFile); + + if (originalFilename.endsWith(".zip")) { + GeneralUtils.deleteDirectory(tempInputFile.getParent()); + } } - - List command = new ArrayList<>(); - command.add("weasyprint"); - command.add(tempInputFile.toString()); - command.add(tempOutputFile.toString()); - int returnCode = 0; - if (originalFilename.endsWith(".zip")) { - returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) - .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile()); - } else { - - returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) - .runCommandWithOutputHandling(command); - } - - byte[] pdfBytes = Files.readAllBytes(tempOutputFile); - - // Clean up temporary files - Files.delete(tempOutputFile); - Files.delete(tempInputFile); - if (originalFilename.endsWith(".zip")) { - GeneralUtils.deleteDirectory(tempInputFile.getParent()); - } - String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename); } - private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException { - Path tempDirectory = Files.createTempDirectory("unzipped_"); - try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) { - ZipEntry entry = zipIn.getNextEntry(); - while (entry != null) { - Path filePath = tempDirectory.resolve(entry.getName()); - if (!entry.isDirectory()) { - Files.copy(zipIn, filePath); - } - zipIn.closeEntry(); - entry = zipIn.getNextEntry(); - } - } - return tempDirectory.resolve("index.html"); - } + private Path unzipAndGetMainHtml(MultipartFile zipFile) throws IOException { + Path tempDirectory = Files.createTempDirectory("unzipped_"); + try (ZipInputStream zipIn = new ZipInputStream(new ByteArrayInputStream(zipFile.getBytes()))) { + ZipEntry entry = zipIn.getNextEntry(); + while (entry != null) { + Path filePath = tempDirectory.resolve(entry.getName()); + if (entry.isDirectory()) { + Files.createDirectories(filePath); // Explicitly create the directory structure + } else { + Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist + Files.copy(zipIn, filePath); + } + zipIn.closeEntry(); + entry = zipIn.getNextEntry(); + } + } + + //search for the main HTML file. + try (Stream walk = Files.walk(tempDirectory)) { + List htmlFiles = walk.filter(file -> file.toString().endsWith(".html")) + .collect(Collectors.toList()); + + if (htmlFiles.isEmpty()) { + throw new IOException("No HTML files found in the unzipped directory."); + } + + // Prioritize 'index.html' if it exists, otherwise use the first .html file + for (Path htmlFile : htmlFiles) { + if (htmlFile.getFileName().toString().equals("index.html")) { + return htmlFile; + } + } + + return htmlFiles.get(0); + } + } diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java index f69adbd1..9167a6e4 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java @@ -34,27 +34,30 @@ public class ConvertWebsiteToPDF { String URL) throws IOException, InterruptedException { // Validate the URL format - if(!URL.matches("^https?://.*") && GeneralUtils.isValidURL(URL)) { + if(!URL.matches("^https?://.*") || !GeneralUtils.isValidURL(URL)) { throw new IllegalArgumentException("Invalid URL format provided."); } - - // Prepare the output file path - Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - - // Prepare the OCRmyPDF command - List command = new ArrayList<>(); - command.add("weasyprint"); - command.add(URL); - command.add(tempOutputFile.toString()); - - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command); - - // Read the optimized PDF file - byte[] pdfBytes = Files.readAllBytes(tempOutputFile); - - // Clean up the temporary files - Files.delete(tempOutputFile); - + Path tempOutputFile = null; + byte[] pdfBytes; + try { + // Prepare the output file path + tempOutputFile = Files.createTempFile("output_", ".pdf"); + + // Prepare the OCRmyPDF command + List command = new ArrayList<>(); + command.add("weasyprint"); + command.add(URL); + command.add(tempOutputFile.toString()); + + int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command); + + // Read the optimized PDF file + pdfBytes = Files.readAllBytes(tempOutputFile); + } + finally { + // Clean up the temporary files + Files.delete(tempOutputFile); + } // Convert URL to a safe filename String outputFilename = convertURLToFileName(URL); diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index 98ab5d03..9f99a0b6 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -183,6 +183,12 @@ home.autoSplitPDF.desc=Auto Split Scanned PDF with physical scanned page splitte home.sanitizePdf.title=Sanitize home.sanitizePdf.desc=Remove scripts and other elements from PDF files +home.URLToPDF.title=URL/Website To PDF +home.URLToPDF.desc=Converts any http(s)URL to PDF + +home.HTMLToPDF.title=HTML to PDF +home.HTMLToPDF.desc=Converts any HTML file or zip to PDF + ########################### # # diff --git a/src/main/resources/static/images/html.svg b/src/main/resources/static/images/html.svg new file mode 100644 index 00000000..35d72185 --- /dev/null +++ b/src/main/resources/static/images/html.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/src/main/resources/static/images/url.svg b/src/main/resources/static/images/url.svg new file mode 100644 index 00000000..df35bc8a --- /dev/null +++ b/src/main/resources/static/images/url.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/main/resources/templates/fragments/navbar.html b/src/main/resources/templates/fragments/navbar.html index 804a442e..024b7bc3 100644 --- a/src/main/resources/templates/fragments/navbar.html +++ b/src/main/resources/templates/fragments/navbar.html @@ -71,6 +71,9 @@
+
+
+
diff --git a/src/main/resources/templates/home.html b/src/main/resources/templates/home.html index 6d0d2fcf..8cbb6ca7 100644 --- a/src/main/resources/templates/home.html +++ b/src/main/resources/templates/home.html @@ -34,6 +34,10 @@
+
+
+ +
@@ -73,12 +77,13 @@
-
+
-
-
+ +
+