This commit is contained in:
Anthony Stirling 2023-07-16 00:36:58 +01:00
parent 9af1b0cfdc
commit 29aabdfba8
5 changed files with 302 additions and 249 deletions

View file

@ -22,66 +22,77 @@ import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.WebResponseUtils;
import io.swagger.v3.oas.annotations.media.Schema;
@RestController
@Tag(name = "Filter", description = "Filter APIs")
public class FilterController {
@PostMapping(consumes = "multipart/form-data", value = "/contains-text")
@PostMapping(consumes = "multipart/form-data", value = "/filter-contains-text")
@Operation(summary = "Checks if a PDF contains set text, returns true if does", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean containsText(
public ResponseEntity<byte[]> containsText(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be converted to a PDF/A file", required = true) MultipartFile inputFile,
@Parameter(description = "The text to check for", required = true) String text,
@Parameter(description = "The page number to check for text on accepts 'All', ranges like '1-4'", required = false) String pageNumber)
throws IOException, InterruptedException {
PDDocument pdfDocument = PDDocument.load(inputFile.getInputStream());
return PdfUtils.hasText(pdfDocument, pageNumber);
if (PdfUtils.hasText(pdfDocument, pageNumber, text))
return WebResponseUtils.pdfDocToWebResponse(pdfDocument, inputFile.getOriginalFilename());
return null;
}
// TODO
@PostMapping(consumes = "multipart/form-data", value = "/contains-image")
@PostMapping(consumes = "multipart/form-data", value = "/filter-contains-image")
@Operation(summary = "Checks if a PDF contains an image", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean containsImage(
public ResponseEntity<byte[]> containsImage(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be converted to a PDF/A file", required = true) MultipartFile inputFile,
@Parameter(description = "The page number to check for image on accepts 'All', ranges like '1-4'", required = false) String pageNumber)
throws IOException, InterruptedException {
PDDocument pdfDocument = PDDocument.load(inputFile.getInputStream());
return PdfUtils.hasImagesOnPage(null);
if (PdfUtils.hasImages(pdfDocument, pageNumber))
return WebResponseUtils.pdfDocToWebResponse(pdfDocument, inputFile.getOriginalFilename());
return null;
}
@PostMapping(consumes = "multipart/form-data", value = "/page-count")
@PostMapping(consumes = "multipart/form-data", value = "/filter-page-count")
@Operation(summary = "Checks if a PDF is greater, less or equal to a setPageCount", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean pageCount(
public ResponseEntity<byte[]> pageCount(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Page Count", required = true) String pageCount,
@Parameter(description = "Comparison type",
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than",
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
@Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
"Greater", "Equal", "Less" })) String comparator)
throws IOException, InterruptedException {
// Load the PDF
PDDocument document = PDDocument.load(inputFile.getInputStream());
int actualPageCount = document.getNumberOfPages();
boolean valid = false;
// Perform the comparison
switch (comparator) {
case "Greater":
return actualPageCount > Integer.parseInt(pageCount);
valid = actualPageCount > Integer.parseInt(pageCount);
break;
case "Equal":
return actualPageCount == Integer.parseInt(pageCount);
valid = actualPageCount == Integer.parseInt(pageCount);
break;
case "Less":
return actualPageCount < Integer.parseInt(pageCount);
valid = actualPageCount < Integer.parseInt(pageCount);
break;
default:
throw new IllegalArgumentException("Invalid comparator: " + comparator);
}
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
}
@PostMapping(consumes = "multipart/form-data", value = "/page-size")
@PostMapping(consumes = "multipart/form-data", value = "/filter-page-size")
@Operation(summary = "Checks if a PDF is of a certain size", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean pageSize(
public ResponseEntity<byte[]> pageSize(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Standard Page Size", required = true) String standardPageSize,
@Parameter(description = "Comparison type",
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than",
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
@Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
"Greater", "Equal", "Less" })) String comparator)
throws IOException, InterruptedException {
// Load the PDF
@ -97,55 +108,67 @@ public class FilterController {
PDRectangle standardSize = PdfUtils.textToPageSize(standardPageSize);
float standardArea = standardSize.getWidth() * standardSize.getHeight();
boolean valid = false;
// Perform the comparison
switch (comparator) {
case "Greater":
return actualArea > standardArea;
valid = actualArea > standardArea;
break;
case "Equal":
return actualArea == standardArea;
valid = actualArea == standardArea;
break;
case "Less":
return actualArea < standardArea;
valid = actualArea < standardArea;
break;
default:
throw new IllegalArgumentException("Invalid comparator: " + comparator);
}
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
}
@PostMapping(consumes = "multipart/form-data", value = "/file-size")
@PostMapping(consumes = "multipart/form-data", value = "/filter-file-size")
@Operation(summary = "Checks if a PDF is a set file size", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean fileSize(
public ResponseEntity<byte[]> fileSize(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "File Size", required = true) String fileSize,
@Parameter(description = "Comparison type",
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than",
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
@Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
"Greater", "Equal", "Less" })) String comparator)
throws IOException, InterruptedException {
// Get the file size
long actualFileSize = inputFile.getSize();
boolean valid = false;
// Perform the comparison
switch (comparator) {
case "Greater":
return actualFileSize > Long.parseLong(fileSize);
valid = actualFileSize > Long.parseLong(fileSize);
break;
case "Equal":
return actualFileSize == Long.parseLong(fileSize);
valid = actualFileSize == Long.parseLong(fileSize);
break;
case "Less":
return actualFileSize < Long.parseLong(fileSize);
valid = actualFileSize < Long.parseLong(fileSize);
break;
default:
throw new IllegalArgumentException("Invalid comparator: " + comparator);
}
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
}
@PostMapping(consumes = "multipart/form-data", value = "/page-rotation")
@PostMapping(consumes = "multipart/form-data", value = "/filter-page-rotation")
@Operation(summary = "Checks if a PDF is of a certain rotation", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean pageRotation(
public ResponseEntity<byte[]> pageRotation(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Rotation in degrees", required = true) int rotation,
@Parameter(description = "Comparison type",
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than",
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
@Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
"Greater", "Equal", "Less" })) String comparator)
throws IOException, InterruptedException {
// Load the PDF
@ -154,18 +177,26 @@ public class FilterController {
// Get the rotation of the first page
PDPage firstPage = document.getPage(0);
int actualRotation = firstPage.getRotation();
boolean valid = false;
// Perform the comparison
switch (comparator) {
case "Greater":
return actualRotation > rotation;
valid = actualRotation > rotation;
break;
case "Equal":
return actualRotation == rotation;
valid = actualRotation == rotation;
break;
case "Less":
return actualRotation < rotation;
valid = actualRotation < rotation;
break;
default:
throw new IllegalArgumentException("Invalid comparator: " + comparator);
}
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
}
}

View file

@ -292,6 +292,12 @@ public class PipelineController {
ResponseEntity<byte[]> response = restTemplate.exchange(url, HttpMethod.POST, entity, byte[].class);
// If the operation is filter and the response body is null or empty, skip this file
if (operation.startsWith("filter-") && (response.getBody() == null || response.getBody().length == 0)) {
logger.info("Skipping file due to failing {}", operation);
continue;
}
if (!response.getStatusCode().equals(HttpStatus.OK)) {
logPrintStream.println("Error: " + response.getBody());
hasErrors = true;

View file

@ -39,8 +39,14 @@ public class GeneralUtils {
// loop through the page order array
for (String element : pageOrderArr) {
// check if the element contains a range of pages
if (element.matches("\\d*n\\+?-?\\d*|\\d*\\+?n")) {
if (element.equalsIgnoreCase("all")) {
for (int i = 0; i < totalPages; i++) {
newPageOrder.add(i);
}
// As all pages are already added, no need to check further
break;
}
else if (element.matches("\\d*n\\+?-?\\d*|\\d*\\+?n")) {
// Handle page order as a function
int coefficient = 0;
int constant = 0;

View file

@ -68,44 +68,38 @@ public class PdfUtils {
}
}
public boolean hasImageInFile(PDDocument pdfDocument, String text, String pagesToCheck) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
String pdfText = "";
if(pagesToCheck == null || pagesToCheck.equals("all")) {
pdfText = textStripper.getText(pdfDocument);
} else {
// remove whitespaces
pagesToCheck = pagesToCheck.replaceAll("\\s+", "");
String[] splitPoints = pagesToCheck.split(",");
for (String splitPoint : splitPoints) {
if (splitPoint.contains("-")) {
// Handle page ranges
String[] range = splitPoint.split("-");
int startPage = Integer.parseInt(range[0]);
int endPage = Integer.parseInt(range[1]);
for (int i = startPage; i <= endPage; i++) {
textStripper.setStartPage(i);
textStripper.setEndPage(i);
pdfText += textStripper.getText(pdfDocument);
}
} else {
// Handle individual page
int page = Integer.parseInt(splitPoint);
textStripper.setStartPage(page);
textStripper.setEndPage(page);
pdfText += textStripper.getText(pdfDocument);
}
public static boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
String[] pageOrderArr = pagesToCheck.split(",");
List<Integer> pageList = GeneralUtils.parsePageList(pageOrderArr, document.getNumberOfPages());
for (int pageNumber : pageList) {
PDPage page = document.getPage(pageNumber);
if (hasImagesOnPage(page)) {
return true;
}
}
pdfDocument.close();
return pdfText.contains(text);
return false;
}
public static boolean hasText(PDDocument document, String pageNumbersToCheck, String phrase) throws IOException {
String[] pageOrderArr = pageNumbersToCheck.split(",");
List<Integer> pageList = GeneralUtils.parsePageList(pageOrderArr, document.getNumberOfPages());
for (int pageNumber : pageList) {
PDPage page = document.getPage(pageNumber);
if (hasTextOnPage(page, phrase)) {
return true;
}
}
return false;
}
public static boolean hasImagesOnPage(PDPage page) throws IOException {
ImageFinder imageFinder = new ImageFinder(page);
imageFinder.processPage(page);
@ -113,10 +107,15 @@ public class PdfUtils {
}
public static boolean hasText(PDDocument document, String phrase) throws IOException {
PDFTextStripper pdfStripper = new PDFTextStripper();
String text = pdfStripper.getText(document);
return text.contains(phrase);
public static boolean hasTextOnPage(PDPage page, String phrase) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
PDDocument tempDoc = new PDDocument();
tempDoc.addPage(page);
String pageText = textStripper.getText(tempDoc);
tempDoc.close();
return pageText.contains(phrase);
}

View file

@ -10,6 +10,7 @@ import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
public class WebResponseUtils {
@ -21,6 +22,16 @@ public class WebResponseUtils {
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), docName, mediaType);
}
public static ResponseEntity<byte[]> multiPartFileToWebResponse(MultipartFile file) throws IOException {
String fileName = file.getOriginalFilename();
MediaType mediaType = MediaType.parseMediaType(file.getContentType());
byte[] bytes = file.getBytes();
return bytesToWebResponse(bytes, fileName, mediaType);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName, MediaType mediaType) throws IOException {
// Return the PDF as a response