This commit is contained in:
Anthony Stirling 2023-08-05 23:03:49 +01:00
parent b07437dbfa
commit 1e35556034
6 changed files with 251 additions and 72 deletions

View file

@ -0,0 +1,139 @@
package stirling.software.SPDF.controller.api.converters;
import java.io.IOException;
import org.commonmark.node.Node;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Hidden;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestPart;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.http.ResponseEntity;
import io.swagger.v3.oas.annotations.Operation;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@RestController
@Tag(name = "Convert", description = "Convert APIs")
public class ConvertEpubToPdf {
//TODO
@PostMapping(consumes = "multipart/form-data", value = "/epub-to-single-pdf")
@Hidden
@Operation(
summary = "Convert an EPUB file to a single PDF",
description = "This endpoint takes an EPUB file input and converts it to a single PDF."
)
public ResponseEntity<byte[]> epubToSinglePdf(
@RequestPart(required = true, value = "fileInput") MultipartFile fileInput)
throws Exception {
if (fileInput == null) {
throw new IllegalArgumentException("Please provide an EPUB file for conversion.");
}
String originalFilename = fileInput.getOriginalFilename();
if (originalFilename == null || !originalFilename.endsWith(".epub")) {
throw new IllegalArgumentException("File must be in .epub format.");
}
Map<String, byte[]> epubContents = extractEpubContent(fileInput);
List<String> htmlFilesOrder = getHtmlFilesOrderFromOpf(epubContents);
List<byte[]> individualPdfs = new ArrayList<>();
for (String htmlFile : htmlFilesOrder) {
byte[] htmlContent = epubContents.get(htmlFile);
byte[] pdfBytes = FileToPdf.convertHtmlToPdf(htmlContent, htmlFile.replace(".html", ".pdf"));
individualPdfs.add(pdfBytes);
}
// Pseudo-code to merge individual PDFs into one.
byte[] mergedPdfBytes = mergeMultiplePdfsIntoOne(individualPdfs);
return WebResponseUtils.bytesToWebResponse(mergedPdfBytes, originalFilename.replace(".epub", ".pdf"));
}
// Assuming a pseudo-code function that merges multiple PDFs into one.
private byte[] mergeMultiplePdfsIntoOne(List<byte[]> individualPdfs) {
// You can use a library such as iText or PDFBox to perform the merging here.
// Return the byte[] of the merged PDF.
return null;
}
private Map<String, byte[]> extractEpubContent(MultipartFile fileInput) throws IOException {
Map<String, byte[]> contentMap = new HashMap<>();
try (ZipInputStream zis = new ZipInputStream(fileInput.getInputStream())) {
ZipEntry zipEntry = zis.getNextEntry();
while (zipEntry != null) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int read = 0;
while ((read = zis.read(buffer)) != -1) {
baos.write(buffer, 0, read);
}
contentMap.put(zipEntry.getName(), baos.toByteArray());
zipEntry = zis.getNextEntry();
}
}
return contentMap;
}
private List<String> getHtmlFilesOrderFromOpf(Map<String, byte[]> epubContents) throws Exception {
String opfContent = new String(epubContents.get("OEBPS/content.opf")); // Adjusting for given path
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(opfContent));
Document doc = dBuilder.parse(is);
NodeList itemRefs = doc.getElementsByTagName("itemref");
List<String> htmlFilesOrder = new ArrayList<>();
for (int i = 0; i < itemRefs.getLength(); i++) {
Element itemRef = (Element) itemRefs.item(i);
String idref = itemRef.getAttribute("idref");
NodeList items = doc.getElementsByTagName("item");
for (int j = 0; j < items.getLength(); j++) {
Element item = (Element) items.item(j);
if (idref.equals(item.getAttribute("id"))) {
htmlFilesOrder.add(item.getAttribute("href")); // Fetching the actual href
break;
}
}
}
return htmlFilesOrder;
}
}

View file

@ -260,6 +260,37 @@ PdfToSinglePage.tags=single page
# WEB PAGES #
# #
###########################
#pdfToSinglePage
pdfToSinglePage.title=PDF To Single Page
pdfToSinglePage.header=PDF To Single Page
pdfToSinglePage.submit=Convert To Single Page
#pageExtracter
pageExtracter.title=Extract Pages
pageExtracter.header=Extract Pages
pageExtracter.submit=Extract
#getPdfInfo
getPdfInfo.title=Get Info on PDF
getPdfInfo.header=Get Info on PDF
getPdfInfo.submit=Get Info
getPdfInfo.downloadJson=Download JSON
#markdown-to-pdf
MarkdownToPDF.title=Markdown To PDF
MarkdownToPDF.header=Markdown To PDF
MarkdownToPDF.submit=Convert
MarkdownToPDF.help=Work in progress
MarkdownToPDF.credit=Uses WeasyPrint
#url-to-pdf
URLToPDF.title=URL To PDF
URLToPDF.header=URL To PDF

View file

@ -17,7 +17,7 @@
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
<input type="hidden" id="customMode" name="customMode" value="">
<div class="form-group">
<label for="pageOrder" th:text="#{pageSelection}"></label>
<label for="pageOrder" th:text="#{pageOrderPrompt}"></label>
<input type="text" class="form-control" id="pageOrder" name="pageOrder" placeholder="(e.g. 1,2,8 or 4,7,12-16 or 2n-1)" required>
</div>

View file

@ -57,6 +57,8 @@
<div th:replace="~{fragments/navbarEntry :: navbarEntry ( 'auto-split-pdf', 'images/layout-split.svg', 'home.autoSplitPDF.title', 'home.autoSplitPDF.desc', 'autoSplitPDF.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('adjust-contrast', 'images/adjust-contrast.svg', 'home.adjust-contrast.title', 'home.adjust-contrast.desc', 'adjust-contrast.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('crop', 'images/crop.svg', 'home.crop.title', 'home.crop.desc', 'crop.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('extract-page', 'images/extract.svg', 'home.extractPage.title', 'home.extractPage.desc', 'extractPage.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('pdf-to-single-page', 'images/single-page.svg', 'home.PdfToSinglePage.title', 'home.PdfToSinglePage.desc', 'PdfToSinglePage.tags')}"></div>
</div>
@ -73,7 +75,7 @@
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('file-to-pdf', 'images/file.svg', 'home.fileToPDF.title', 'home.fileToPDF.desc', 'fileToPDF.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('html-to-pdf', 'images/html.svg', 'home.HTMLToPDF.title', 'home.HTMLToPDF.desc', 'HTMLToPDF.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('url-to-pdf', 'images/url.svg', 'home.URLToPDF.title', 'home.URLToPDF.desc', 'URLToPDF.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('markdown-to-pdf', 'images/markdown.svg', 'home.MarkdownToPDF.title', 'home.MarkdownToPDF.desc', 'MarkdownToPDF.tags')}"></div>
<hr class="dropdown-divider">
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('pdf-to-img', 'images/image.svg', 'home.pdfToImage.title', 'home.pdfToImage.desc', 'pdfToImage.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('pdf-to-word', 'images/file-earmark-word.svg', 'home.PDFToWord.title', 'home.PDFToWord.desc', 'PDFToWord.tags')}"></div>
@ -102,6 +104,7 @@
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('add-watermark', 'images/droplet.svg', 'home.watermark.title', 'home.watermark.desc', 'watermark.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('cert-sign', 'images/award.svg', 'home.certSign.title', 'home.certSign.desc', 'certSign.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('sanitize-pdf', 'images/sanitize.svg', 'home.sanitizePdf.title', 'home.sanitizePdf.desc', 'sanitizePdf.tags')}"></div>
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('get-info-on-pdf', 'images/info.svg', 'home.getPdfInfo.title', 'home.getPdfInfo.desc', 'getPdfInfo.tags')}"></div>
</div>
</li>

View file

@ -14,7 +14,6 @@
<div class="col-md-6">
<h2 th:text="#{pdfToSinglePage.header}"></h2>
<form method="post" enctype="multipart/form-data" th:action="@{pdf-to-single-page}">
<p th:text="#{pdfToSinglePage.formPrompt}"></p>
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false)}"></div>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{pdfToSinglePage.submit}"></button>
</form>

View file

@ -1,9 +1,9 @@
<!DOCTYPE html>
<html th:lang="${#locale.toString()}" th:lang-direction="#{language.direction}" xmlns:th="http://www.thymeleaf.org">
<th:block th:insert="~{fragments/common :: head(title=#{getPdfInfo.title})}"></th:block>
<html th:lang="${#locale.toString()}"
th:lang-direction="#{language.direction}"
xmlns:th="http://www.thymeleaf.org">
<th:block
th:insert="~{fragments/common :: head(title=#{getPdfInfo.title})}"></th:block>
<body>
<div id="page-container">
<div id="content-wrap">
@ -13,11 +13,13 @@
<div class="row justify-content-center">
<div class="col-md-6">
<h2 th:text="#{getPdfInfo.header}"></h2>
<p th:text="#{processTimeWarning}">
<form id="pdfInfoForm" method="post" enctype="multipart/form-data" th:action="@{get-info-on-pdf}">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, remoteCall='false')}"></div>
<form id="pdfInfoForm" method="post" enctype="multipart/form-data"
th:action="@{get-info-on-pdf}">
<div
th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, remoteCall='false')}"></div>
<br>
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{getPdfInfo.submit}"></button>
<button type="submit" id="submitBtn" class="btn btn-primary"
th:text="#{getPdfInfo.submit}"></button>
</form>
<div class="container mt-5">
@ -27,7 +29,9 @@
</div>
<!-- Button to download the JSON -->
<a href="#" id="downloadJson" class="btn btn-primary mt-3">Download JSON</a>
<a href="#" id="downloadJson" class="btn btn-primary mt-3"
style="display: none;" th:text="#{getPdfInfo.downloadJson}">Download
JSON</a>
</div>
<script>
@ -47,6 +51,7 @@
.then(data => {
displayJsonData(data); // Display the data
setDownloadLink(data); // Set download link
document.getElementById("downloadJson").style.display = "block";
})
.catch(error => console.error('Error:', error));
});
@ -67,8 +72,6 @@
}
function renderJsonSection(key, value, depth = 0) {
// Replace spaces and other non-alphanumeric characters with underscores for valid IDs
let safeKey = (typeof key === "string") ? key.replace(/[^a-zA-Z0-9]/g, '_') : key;
@ -78,35 +81,50 @@
<h5 class="mb-0">`;
// Check if the value is an object and has children
if (value && typeof value === 'object' && (Object.keys(value).length || Array.isArray(value))) {
if (value && typeof value === 'object') {
// For arrays and non-array objects
if (Array.isArray(value) && value.length === 0) {
output += `${key}: Empty array`;
} else if (!Array.isArray(value) && Object.keys(value).length === 0) {
output += `${key}: Empty object`;
} else {
output += `
<button class="btn btn-link" type="button" data-toggle="collapse" data-target="#${safeKey}-content-${depth}" aria-expanded="true" aria-controls="${safeKey}-content-${depth}">
${key}
</button>`;
}
} else {
// Display both key and value for simple entries
// For simple key-value pairs
output += `${key}: ${value}`;
}
output += `
</h5>
</div>
<div id="${safeKey}-content-${depth}" class="collapse" aria-labelledby="${safeKey}-heading-${depth}">`;
// Check if the value is a nested object
if (typeof value === 'object' && !Array.isArray(value)) {
if (value && typeof value === 'object' && !Array.isArray(value)) {
output += '<div class="card-body">';
if (Object.keys(value).length) {
for (const subKey in value) {
output += renderJsonSection(subKey, value[subKey], depth + 1);
}
} else {
output += '<p class="text-muted">Empty object</p>';
}
output += '</div>';
} else if (typeof value === 'object' && Array.isArray(value) && value.length) { // Array values
} else if (value && typeof value === 'object' && Array.isArray(value)) {
output += '<div class="card-body">';
if (value.length) {
value.forEach((val, index) => {
// For arrays, we're going to make the displayed key more descriptive.
const arrayKey = `${key}[${index}]`;
output += renderJsonSection(arrayKey, val, depth + 1);
});
} else {
output += '<p class="text-muted">Empty array</p>';
}
output += '</div>';
}
@ -114,17 +132,6 @@
return output;
}
</script>
</div>
</div>