itext removal fixes

This commit is contained in:
Anthony Stirling 2023-09-02 19:12:08 +01:00
parent a7cd6bfd2e
commit 862086eae5
4 changed files with 91 additions and 61 deletions

View file

@ -1,12 +1,15 @@
package stirling.software.SPDF.controller.api; package stirling.software.SPDF.controller.api;
import java.awt.geom.AffineTransform; import java.awt.geom.AffineTransform;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -49,28 +52,37 @@ public class ToSinglePageController {
@Parameter(description = "The input multi-page PDF file to be converted into a single page", required = true) @Parameter(description = "The input multi-page PDF file to be converted into a single page", required = true)
MultipartFile file) throws IOException { MultipartFile file) throws IOException {
// Load the source document
PDDocument sourceDocument = PDDocument.load(file.getInputStream()); PDDocument sourceDocument = PDDocument.load(file.getInputStream());
float totalHeight = 0;
float width = 0;
// Calculate total height and max width
float totalHeight = 0;
float maxWidth = 0;
for (PDPage page : sourceDocument.getPages()) { for (PDPage page : sourceDocument.getPages()) {
PDRectangle pageSize = page.getMediaBox(); PDRectangle pageSize = page.getMediaBox();
totalHeight += pageSize.getHeight(); totalHeight += pageSize.getHeight();
if(width < pageSize.getWidth()) maxWidth = Math.max(maxWidth, pageSize.getWidth());
width = pageSize.getWidth();
} }
// Create new document and page with calculated dimensions
PDDocument newDocument = new PDDocument(); PDDocument newDocument = new PDDocument();
PDPage newPage = new PDPage(new PDRectangle(width, totalHeight)); PDPage newPage = new PDPage(new PDRectangle(maxWidth, totalHeight));
newDocument.addPage(newPage); newDocument.addPage(newPage);
// Initialize the content stream of the new page
PDPageContentStream contentStream = new PDPageContentStream(newDocument, newPage);
contentStream.close();
LayerUtility layerUtility = new LayerUtility(newDocument); LayerUtility layerUtility = new LayerUtility(newDocument);
float yOffset = totalHeight; float yOffset = totalHeight;
// For each page, copy its content to the new page at the correct offset
for (PDPage page : sourceDocument.getPages()) { for (PDPage page : sourceDocument.getPages()) {
PDFormXObject form = layerUtility.importPageAsForm(sourceDocument, sourceDocument.getPages().indexOf(page)); PDFormXObject form = layerUtility.importPageAsForm(sourceDocument, sourceDocument.getPages().indexOf(page));
AffineTransform af = AffineTransform.getTranslateInstance(0, yOffset - page.getMediaBox().getHeight()); AffineTransform af = AffineTransform.getTranslateInstance(0, yOffset - page.getMediaBox().getHeight());
layerUtility.appendFormAsLayer(newDocument.getPage(0), form, af, page.getResources().getCOSObject().toString()); layerUtility.wrapInSaveRestore(newPage);
String defaultLayerName = "Layer" + sourceDocument.getPages().indexOf(page);
layerUtility.appendFormAsLayer(newPage, form, af, defaultLayerName);
yOffset -= page.getMediaBox().getHeight(); yOffset -= page.getMediaBox().getHeight();
} }
@ -81,5 +93,9 @@ public class ToSinglePageController {
byte[] result = baos.toByteArray(); byte[] result = baos.toByteArray();
return WebResponseUtils.bytesToWebResponse(result, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_singlePage.pdf"); return WebResponseUtils.bytesToWebResponse(result, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_singlePage.pdf");
} }
} }

View file

@ -60,6 +60,11 @@ public class PageNumbersController {
case "large": case "large":
marginFactor = 0.05f; marginFactor = 0.05f;
break; break;
case "x-large":
marginFactor = 0.075f;
break;
default: default:
marginFactor = 0.035f; marginFactor = 0.035f;
break; break;
@ -67,7 +72,12 @@ public class PageNumbersController {
float fontSize = 12.0f; float fontSize = 12.0f;
PDType1Font font = PDType1Font.HELVETICA; PDType1Font font = PDType1Font.HELVETICA;
if(pagesToNumber == null || pagesToNumber.length() == 0) {
pagesToNumber = "all";
}
if(customText == null || customText.length() == 0) {
customText = "{n}";
}
List<Integer> pagesToNumberList = GeneralUtils.parsePageList(pagesToNumber.split(","), document.getNumberOfPages()); List<Integer> pagesToNumberList = GeneralUtils.parsePageList(pagesToNumber.split(","), document.getNumberOfPages());
for (int i : pagesToNumberList) { for (int i : pagesToNumberList) {
@ -120,7 +130,7 @@ public class PageNumbersController {
document.save(baos); document.save(baos);
document.close(); document.close();
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), URLEncoder.encode(file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_numbersAdded.pdf", "UTF-8"), MediaType.APPLICATION_PDF); return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_numbersAdded.pdf", MediaType.APPLICATION_PDF);
} }

View file

@ -29,6 +29,7 @@ public class ShowJavascript {
try (PDDocument document = PDDocument.load(inputFile.getInputStream())) { try (PDDocument document = PDDocument.load(inputFile.getInputStream())) {
if(document.getDocumentCatalog() != null && document.getDocumentCatalog().getNames() != null) {
PDNameTreeNode<PDActionJavaScript> jsTree = document.getDocumentCatalog().getNames().getJavaScript(); PDNameTreeNode<PDActionJavaScript> jsTree = document.getDocumentCatalog().getNames().getJavaScript();
if (jsTree != null) { if (jsTree != null) {
@ -42,6 +43,7 @@ public class ShowJavascript {
script += "// File: " + inputFile.getOriginalFilename() + ", Script: " + name + "\n" + jsCodeStr + "\n"; script += "// File: " + inputFile.getOriginalFilename() + ", Script: " + name + "\n" + jsCodeStr + "\n";
} }
} }
}
if (script.isEmpty()) { if (script.isEmpty()) {
script = "PDF '" + inputFile.getOriginalFilename() + "' does not contain Javascript"; script = "PDF '" + inputFile.getOriginalFilename() + "' does not contain Javascript";

View file

@ -185,6 +185,7 @@ public class GetInfoOnPDF {
//embeed files TODO size //embeed files TODO size
if(catalog.getNames() != null) {
PDEmbeddedFilesNameTreeNode efTree = catalog.getNames().getEmbeddedFiles(); PDEmbeddedFilesNameTreeNode efTree = catalog.getNames().getEmbeddedFiles();
ArrayNode embeddedFilesArray = objectMapper.createArrayNode(); ArrayNode embeddedFilesArray = objectMapper.createArrayNode();
@ -203,6 +204,7 @@ public class GetInfoOnPDF {
} }
} }
other.set("EmbeddedFiles", embeddedFilesArray); other.set("EmbeddedFiles", embeddedFilesArray);
}
@ -374,7 +376,7 @@ public class GetInfoOnPDF {
ObjectNode pageInfoParent = objectMapper.createObjectNode(); ObjectNode pageInfoParent = objectMapper.createObjectNode();
for (int pageNum = 1; pageNum <= pdfBoxDoc.getNumberOfPages(); pageNum++) { for (int pageNum = 0; pageNum < pdfBoxDoc.getNumberOfPages(); pageNum++) {
ObjectNode pageInfo = objectMapper.createObjectNode(); ObjectNode pageInfo = objectMapper.createObjectNode();
// Retrieve the page // Retrieve the page
@ -411,8 +413,8 @@ public class GetInfoOnPDF {
// Content Extraction // Content Extraction
PDFTextStripper textStripper = new PDFTextStripper(); PDFTextStripper textStripper = new PDFTextStripper();
textStripper.setStartPage(pageNum -1); textStripper.setStartPage(pageNum + 1);
textStripper.setEndPage(pageNum - 1); textStripper.setEndPage(pageNum +1);
String pageText = textStripper.getText(pdfBoxDoc); String pageText = textStripper.getText(pdfBoxDoc);
pageInfo.put("Text Characters Count", pageText.length()); // pageInfo.put("Text Characters Count", pageText.length()); //