html to pdf
This commit is contained in:
parent
2fd8c643af
commit
116d103119
9 changed files with 129 additions and 74 deletions
|
@ -1,5 +1,5 @@
|
||||||
# Build jbig2enc in a separate stage
|
# Build jbig2enc in a separate stage
|
||||||
FROM frooodle/stirling-pdf-base:latest
|
FROM frooodle/stirling-pdf-base:beta4
|
||||||
|
|
||||||
# Create scripts folder and copy local scripts
|
# Create scripts folder and copy local scripts
|
||||||
RUN mkdir /scripts
|
RUN mkdir /scripts
|
||||||
|
|
|
@ -83,6 +83,8 @@ public class EndpointConfiguration {
|
||||||
addEndpointToGroup("Convert", "pdf-to-text");
|
addEndpointToGroup("Convert", "pdf-to-text");
|
||||||
addEndpointToGroup("Convert", "pdf-to-html");
|
addEndpointToGroup("Convert", "pdf-to-html");
|
||||||
addEndpointToGroup("Convert", "pdf-to-xml");
|
addEndpointToGroup("Convert", "pdf-to-xml");
|
||||||
|
addEndpointToGroup("Convert", "html-to-pdf");
|
||||||
|
addEndpointToGroup("Convert", "url-to-pdf");
|
||||||
|
|
||||||
// Adding endpoints to "Security" group
|
// Adding endpoints to "Security" group
|
||||||
addEndpointToGroup("Security", "add-password");
|
addEndpointToGroup("Security", "add-password");
|
||||||
|
@ -125,12 +127,15 @@ public class EndpointConfiguration {
|
||||||
addEndpointToGroup("CLI", "pdf-to-html");
|
addEndpointToGroup("CLI", "pdf-to-html");
|
||||||
addEndpointToGroup("CLI", "pdf-to-xml");
|
addEndpointToGroup("CLI", "pdf-to-xml");
|
||||||
addEndpointToGroup("CLI", "ocr-pdf");
|
addEndpointToGroup("CLI", "ocr-pdf");
|
||||||
|
addEndpointToGroup("CLI", "html-to-pdf");
|
||||||
|
addEndpointToGroup("CLI", "url-to-pdf");
|
||||||
|
|
||||||
|
|
||||||
//python
|
//python
|
||||||
addEndpointToGroup("Python", "extract-image-scans");
|
addEndpointToGroup("Python", "extract-image-scans");
|
||||||
addEndpointToGroup("Python", "remove-blanks");
|
addEndpointToGroup("Python", "remove-blanks");
|
||||||
|
addEndpointToGroup("Python", "html-to-pdf");
|
||||||
|
addEndpointToGroup("Python", "url-to-pdf");
|
||||||
|
|
||||||
//openCV
|
//openCV
|
||||||
addEndpointToGroup("OpenCV", "extract-image-scans");
|
addEndpointToGroup("OpenCV", "extract-image-scans");
|
||||||
|
|
|
@ -4,9 +4,13 @@ import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
import java.util.zip.ZipEntry;
|
import java.util.zip.ZipEntry;
|
||||||
import java.util.zip.ZipInputStream;
|
import java.util.zip.ZipInputStream;
|
||||||
import java.util.*;
|
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.bind.annotation.PostMapping;
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
import org.springframework.web.bind.annotation.RequestPart;
|
import org.springframework.web.bind.annotation.RequestPart;
|
||||||
|
@ -40,10 +44,10 @@ public class ConvertHtmlToPDF {
|
||||||
if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
|
if (originalFilename == null || (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
|
||||||
throw new IllegalArgumentException("File must be either .html or .zip format.");
|
throw new IllegalArgumentException("File must be either .html or .zip format.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||||
Path tempInputFile;
|
Path tempInputFile = null;
|
||||||
|
byte[] pdfBytes;
|
||||||
|
try {
|
||||||
if (originalFilename.endsWith(".html")) {
|
if (originalFilename.endsWith(".html")) {
|
||||||
tempInputFile = Files.createTempFile("input_", ".html");
|
tempInputFile = Files.createTempFile("input_", ".html");
|
||||||
Files.write(tempInputFile, fileInput.getBytes());
|
Files.write(tempInputFile, fileInput.getBytes());
|
||||||
|
@ -65,15 +69,16 @@ public class ConvertHtmlToPDF {
|
||||||
.runCommandWithOutputHandling(command);
|
.runCommandWithOutputHandling(command);
|
||||||
}
|
}
|
||||||
|
|
||||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||||
|
} finally {
|
||||||
// Clean up temporary files
|
// Clean up temporary files
|
||||||
Files.delete(tempOutputFile);
|
Files.delete(tempOutputFile);
|
||||||
Files.delete(tempInputFile);
|
Files.delete(tempInputFile);
|
||||||
|
|
||||||
if (originalFilename.endsWith(".zip")) {
|
if (originalFilename.endsWith(".zip")) {
|
||||||
GeneralUtils.deleteDirectory(tempInputFile.getParent());
|
GeneralUtils.deleteDirectory(tempInputFile.getParent());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf
|
String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") + ".pdf"; // Remove file extension and append .pdf
|
||||||
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
|
||||||
}
|
}
|
||||||
|
@ -86,14 +91,35 @@ public class ConvertHtmlToPDF {
|
||||||
ZipEntry entry = zipIn.getNextEntry();
|
ZipEntry entry = zipIn.getNextEntry();
|
||||||
while (entry != null) {
|
while (entry != null) {
|
||||||
Path filePath = tempDirectory.resolve(entry.getName());
|
Path filePath = tempDirectory.resolve(entry.getName());
|
||||||
if (!entry.isDirectory()) {
|
if (entry.isDirectory()) {
|
||||||
|
Files.createDirectories(filePath); // Explicitly create the directory structure
|
||||||
|
} else {
|
||||||
|
Files.createDirectories(filePath.getParent()); // Create parent directories if they don't exist
|
||||||
Files.copy(zipIn, filePath);
|
Files.copy(zipIn, filePath);
|
||||||
}
|
}
|
||||||
zipIn.closeEntry();
|
zipIn.closeEntry();
|
||||||
entry = zipIn.getNextEntry();
|
entry = zipIn.getNextEntry();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return tempDirectory.resolve("index.html");
|
|
||||||
|
//search for the main HTML file.
|
||||||
|
try (Stream<Path> walk = Files.walk(tempDirectory)) {
|
||||||
|
List<Path> htmlFiles = walk.filter(file -> file.toString().endsWith(".html"))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
if (htmlFiles.isEmpty()) {
|
||||||
|
throw new IOException("No HTML files found in the unzipped directory.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prioritize 'index.html' if it exists, otherwise use the first .html file
|
||||||
|
for (Path htmlFile : htmlFiles) {
|
||||||
|
if (htmlFile.getFileName().toString().equals("index.html")) {
|
||||||
|
return htmlFile;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return htmlFiles.get(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -34,12 +34,14 @@ public class ConvertWebsiteToPDF {
|
||||||
String URL) throws IOException, InterruptedException {
|
String URL) throws IOException, InterruptedException {
|
||||||
|
|
||||||
// Validate the URL format
|
// Validate the URL format
|
||||||
if(!URL.matches("^https?://.*") && GeneralUtils.isValidURL(URL)) {
|
if(!URL.matches("^https?://.*") || !GeneralUtils.isValidURL(URL)) {
|
||||||
throw new IllegalArgumentException("Invalid URL format provided.");
|
throw new IllegalArgumentException("Invalid URL format provided.");
|
||||||
}
|
}
|
||||||
|
Path tempOutputFile = null;
|
||||||
|
byte[] pdfBytes;
|
||||||
|
try {
|
||||||
// Prepare the output file path
|
// Prepare the output file path
|
||||||
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
|
tempOutputFile = Files.createTempFile("output_", ".pdf");
|
||||||
|
|
||||||
// Prepare the OCRmyPDF command
|
// Prepare the OCRmyPDF command
|
||||||
List<String> command = new ArrayList<>();
|
List<String> command = new ArrayList<>();
|
||||||
|
@ -50,11 +52,12 @@ public class ConvertWebsiteToPDF {
|
||||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
|
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
|
||||||
|
|
||||||
// Read the optimized PDF file
|
// Read the optimized PDF file
|
||||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
// Clean up the temporary files
|
// Clean up the temporary files
|
||||||
Files.delete(tempOutputFile);
|
Files.delete(tempOutputFile);
|
||||||
|
}
|
||||||
// Convert URL to a safe filename
|
// Convert URL to a safe filename
|
||||||
String outputFilename = convertURLToFileName(URL);
|
String outputFilename = convertURLToFileName(URL);
|
||||||
|
|
||||||
|
|
|
@ -183,6 +183,12 @@ home.autoSplitPDF.desc=Auto Split Scanned PDF with physical scanned page splitte
|
||||||
home.sanitizePdf.title=Sanitize
|
home.sanitizePdf.title=Sanitize
|
||||||
home.sanitizePdf.desc=Remove scripts and other elements from PDF files
|
home.sanitizePdf.desc=Remove scripts and other elements from PDF files
|
||||||
|
|
||||||
|
home.URLToPDF.title=URL/Website To PDF
|
||||||
|
home.URLToPDF.desc=Converts any http(s)URL to PDF
|
||||||
|
|
||||||
|
home.HTMLToPDF.title=HTML to PDF
|
||||||
|
home.HTMLToPDF.desc=Converts any HTML file or zip to PDF
|
||||||
|
|
||||||
|
|
||||||
###########################
|
###########################
|
||||||
# #
|
# #
|
||||||
|
|
3
src/main/resources/static/images/html.svg
Normal file
3
src/main/resources/static/images/html.svg
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-filetype-html" viewBox="0 0 16 16">
|
||||||
|
<path fill-rule="evenodd" d="M14 4.5V11h-1V4.5h-2A1.5 1.5 0 0 1 9.5 3V1H4a1 1 0 0 0-1 1v9H2V2a2 2 0 0 1 2-2h5.5L14 4.5Zm-9.736 7.35v3.999h-.791v-1.714H1.79v1.714H1V11.85h.791v1.626h1.682V11.85h.79Zm2.251.662v3.337h-.794v-3.337H4.588v-.662h3.064v.662H6.515Zm2.176 3.337v-2.66h.038l.952 2.159h.516l.946-2.16h.038v2.661h.715V11.85h-.8l-1.14 2.596H9.93L8.79 11.85h-.805v3.999h.706Zm4.71-.674h1.696v.674H12.61V11.85h.79v3.325Z"/>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 565 B |
4
src/main/resources/static/images/url.svg
Normal file
4
src/main/resources/static/images/url.svg
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-link" viewBox="0 0 16 16">
|
||||||
|
<path d="M6.354 5.5H4a3 3 0 0 0 0 6h3a3 3 0 0 0 2.83-4H9c-.086 0-.17.01-.25.031A2 2 0 0 1 7 10.5H4a2 2 0 1 1 0-4h1.535c.218-.376.495-.714.82-1z"/>
|
||||||
|
<path d="M9 5.5a3 3 0 0 0-2.83 4h1.098A2 2 0 0 1 9 6.5h3a2 2 0 1 1 0 4h-1.535a4.02 4.02 0 0 1-.82 1H12a3 3 0 1 0 0-6H9z"/>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 403 B |
|
@ -71,6 +71,9 @@
|
||||||
<!-- Existing menu items -->
|
<!-- Existing menu items -->
|
||||||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('img-to-pdf', 'images/image.svg', 'home.imageToPdf.title', 'home.imageToPdf.desc')}"></div>
|
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('img-to-pdf', 'images/image.svg', 'home.imageToPdf.title', 'home.imageToPdf.desc')}"></div>
|
||||||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('file-to-pdf', 'images/file.svg', 'home.fileToPDF.title', 'home.fileToPDF.desc')}"></div>
|
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('file-to-pdf', 'images/file.svg', 'home.fileToPDF.title', 'home.fileToPDF.desc')}"></div>
|
||||||
|
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('html-to-pdf', 'images/html.svg', 'home.HTMLToPDF.title', 'home.HTMLToPDF.desc')}"></div>
|
||||||
|
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('url-to-pdf', 'images/url.svg', 'home.URLToPDF.title', 'home.URLToPDF.desc')}"></div>
|
||||||
|
|
||||||
<hr class="dropdown-divider">
|
<hr class="dropdown-divider">
|
||||||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('pdf-to-img', 'images/image.svg', 'home.pdfToImage.title', 'home.pdfToImage.desc')}"></div>
|
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('pdf-to-img', 'images/image.svg', 'home.pdfToImage.title', 'home.pdfToImage.desc')}"></div>
|
||||||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('pdf-to-word', 'images/file-earmark-word.svg', 'home.PDFToWord.title', 'home.PDFToWord.desc')}"></div>
|
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('pdf-to-word', 'images/file-earmark-word.svg', 'home.PDFToWord.title', 'home.PDFToWord.desc')}"></div>
|
||||||
|
|
|
@ -34,6 +34,10 @@
|
||||||
<div th:replace="~{fragments/card :: card(id='split-pdfs', cardTitle=#{home.split.title}, cardText=#{home.split.desc}, cardLink='split-pdfs', svgPath='images/layout-split.svg')}"></div>
|
<div th:replace="~{fragments/card :: card(id='split-pdfs', cardTitle=#{home.split.title}, cardText=#{home.split.desc}, cardLink='split-pdfs', svgPath='images/layout-split.svg')}"></div>
|
||||||
|
|
||||||
<div th:replace="~{fragments/card :: card(id='rotate-pdf', cardTitle=#{home.rotate.title}, cardText=#{home.rotate.desc}, cardLink='rotate-pdf', svgPath='images/arrow-clockwise.svg')}"></div>
|
<div th:replace="~{fragments/card :: card(id='rotate-pdf', cardTitle=#{home.rotate.title}, cardText=#{home.rotate.desc}, cardLink='rotate-pdf', svgPath='images/arrow-clockwise.svg')}"></div>
|
||||||
|
<div th:replace="~{fragments/card :: card(id='crop', cardTitle=#{home.crop.title}, cardText=#{home.crop.desc}, cardLink='crop', svgPath='images/crop.svg')}"></div>
|
||||||
|
<div th:replace="~{fragments/card :: card(id='add-page-numbers', cardTitle=#{home.add-page-numbers.title}, cardText=#{home.add-page-numbers.desc}, cardLink='add-page-numbers', svgPath='images/add-page-numbers.svg')}"></div>
|
||||||
|
|
||||||
|
<div th:replace="~{fragments/card :: card(id='adjust-contrast', cardTitle=#{home.adjust-contrast.title}, cardText=#{home.adjust-contrast.desc}, cardLink='adjust-contrast', svgPath='images/adjust-contrast.svg')}"></div>
|
||||||
<div th:replace="~{fragments/card :: card(id='img-to-pdf', cardTitle=#{home.imageToPdf.title}, cardText=#{home.imageToPdf.desc}, cardLink='img-to-pdf', svgPath='images/image.svg')}"></div>
|
<div th:replace="~{fragments/card :: card(id='img-to-pdf', cardTitle=#{home.imageToPdf.title}, cardText=#{home.imageToPdf.desc}, cardLink='img-to-pdf', svgPath='images/image.svg')}"></div>
|
||||||
<div th:replace="~{fragments/card :: card(id='pdf-to-img', cardTitle=#{home.pdfToImage.title}, cardText=#{home.pdfToImage.desc}, cardLink='pdf-to-img', svgPath='images/image.svg')}"></div>
|
<div th:replace="~{fragments/card :: card(id='pdf-to-img', cardTitle=#{home.pdfToImage.title}, cardText=#{home.pdfToImage.desc}, cardLink='pdf-to-img', svgPath='images/image.svg')}"></div>
|
||||||
|
|
||||||
|
@ -73,13 +77,14 @@
|
||||||
<div th:replace="~{fragments/card :: card(id='multi-page-layout', cardTitle=#{home.pageLayout.title}, cardText=#{home.pageLayout.desc}, cardLink='multi-page-layout', svgPath='images/page-layout.svg')}"></div>
|
<div th:replace="~{fragments/card :: card(id='multi-page-layout', cardTitle=#{home.pageLayout.title}, cardText=#{home.pageLayout.desc}, cardLink='multi-page-layout', svgPath='images/page-layout.svg')}"></div>
|
||||||
<div th:replace="~{fragments/card :: card(id='scale-pages', cardTitle=#{home.scalePages.title}, cardText=#{home.scalePages.desc}, cardLink='scale-pages', svgPath='images/scale-pages.svg')}"></div>
|
<div th:replace="~{fragments/card :: card(id='scale-pages', cardTitle=#{home.scalePages.title}, cardText=#{home.scalePages.desc}, cardLink='scale-pages', svgPath='images/scale-pages.svg')}"></div>
|
||||||
|
|
||||||
<div th:replace="~{fragments/card :: card(id='add-page-numbers', cardTitle=#{home.add-page-numbers.title}, cardText=#{home.add-page-numbers.desc}, cardLink='add-page-numbers', svgPath='images/add-page-numbers.svg')}"></div>
|
|
||||||
<div th:replace="~{fragments/card :: card(id='auto-rename', cardTitle=#{home.auto-rename.title}, cardText=#{home.auto-rename.desc}, cardLink='auto-rename', svgPath='images/fonts.svg')}"></div>
|
<div th:replace="~{fragments/card :: card(id='auto-rename', cardTitle=#{home.auto-rename.title}, cardText=#{home.auto-rename.desc}, cardLink='auto-rename', svgPath='images/fonts.svg')}"></div>
|
||||||
<div th:replace="~{fragments/card :: card(id='adjust-contrast', cardTitle=#{home.adjust-contrast.title}, cardText=#{home.adjust-contrast.desc}, cardLink='adjust-contrast', svgPath='images/adjust-contrast.svg')}"></div>
|
|
||||||
<div th:replace="~{fragments/card :: card(id='crop', cardTitle=#{home.crop.title}, cardText=#{home.crop.desc}, cardLink='crop', svgPath='images/crop.svg')}"></div>
|
|
||||||
<div th:replace="~{fragments/card :: card(id='auto-split-pdf', cardTitle=#{home.autoSplitPDF.title}, cardText=#{home.autoSplitPDF.desc}, cardLink='auto-split-pdf', svgPath='images/layout-split.svg')}"></div>
|
<div th:replace="~{fragments/card :: card(id='auto-split-pdf', cardTitle=#{home.autoSplitPDF.title}, cardText=#{home.autoSplitPDF.desc}, cardLink='auto-split-pdf', svgPath='images/layout-split.svg')}"></div>
|
||||||
<div th:replace="~{fragments/card :: card(id='sanitize-pdf', cardTitle=#{home.sanitizePdf.title}, cardText=#{home.sanitizePdf.desc}, cardLink='sanitize-pdf', svgPath='images/sanitize.svg')}"></div>
|
<div th:replace="~{fragments/card :: card(id='sanitize-pdf', cardTitle=#{home.sanitizePdf.title}, cardText=#{home.sanitizePdf.desc}, cardLink='sanitize-pdf', svgPath='images/sanitize.svg')}"></div>
|
||||||
|
|
||||||
|
<div th:replace="~{fragments/card :: card(id='url-to-pdf', cardTitle=#{home.URLToPDF.title}, cardText=#{home.URLToPDF.desc}, cardLink='url-to-pdf', svgPath='images/url.svg')}"></div>
|
||||||
|
<div th:replace="~{fragments/card :: card(id='html-to-pdf', cardTitle=#{home.HTMLToPDF.title}, cardText=#{home.HTMLToPDF.desc}, cardLink='html-to-pdf', svgPath='images/html.svg')}"></div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div> </div>
|
</div> </div>
|
||||||
<div th:insert="~{fragments/footer.html :: footer}"></div>
|
<div th:insert="~{fragments/footer.html :: footer}"></div>
|
||||||
|
|
Loading…
Reference in a new issue