This commit is contained in:
Anthony Stirling 2023-07-16 00:36:58 +01:00
parent 9af1b0cfdc
commit 29aabdfba8
5 changed files with 302 additions and 249 deletions

View file

@ -22,66 +22,77 @@ import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor; import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.WebResponseUtils; import stirling.software.SPDF.utils.WebResponseUtils;
import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.media.Schema;
@RestController @RestController
@Tag(name = "Filter", description = "Filter APIs") @Tag(name = "Filter", description = "Filter APIs")
public class FilterController { public class FilterController {
@PostMapping(consumes = "multipart/form-data", value = "/contains-text") @PostMapping(consumes = "multipart/form-data", value = "/filter-contains-text")
@Operation(summary = "Checks if a PDF contains set text, returns true if does", description = "Input:PDF Output:Boolean Type:SISO") @Operation(summary = "Checks if a PDF contains set text, returns true if does", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean containsText( public ResponseEntity<byte[]> containsText(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be converted to a PDF/A file", required = true) MultipartFile inputFile, @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be converted to a PDF/A file", required = true) MultipartFile inputFile,
@Parameter(description = "The text to check for", required = true) String text, @Parameter(description = "The text to check for", required = true) String text,
@Parameter(description = "The page number to check for text on accepts 'All', ranges like '1-4'", required = false) String pageNumber) @Parameter(description = "The page number to check for text on accepts 'All', ranges like '1-4'", required = false) String pageNumber)
throws IOException, InterruptedException { throws IOException, InterruptedException {
PDDocument pdfDocument = PDDocument.load(inputFile.getInputStream()); PDDocument pdfDocument = PDDocument.load(inputFile.getInputStream());
return PdfUtils.hasText(pdfDocument, pageNumber); if (PdfUtils.hasText(pdfDocument, pageNumber, text))
return WebResponseUtils.pdfDocToWebResponse(pdfDocument, inputFile.getOriginalFilename());
return null;
} }
// TODO // TODO
@PostMapping(consumes = "multipart/form-data", value = "/contains-image") @PostMapping(consumes = "multipart/form-data", value = "/filter-contains-image")
@Operation(summary = "Checks if a PDF contains an image", description = "Input:PDF Output:Boolean Type:SISO") @Operation(summary = "Checks if a PDF contains an image", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean containsImage( public ResponseEntity<byte[]> containsImage(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be converted to a PDF/A file", required = true) MultipartFile inputFile, @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be converted to a PDF/A file", required = true) MultipartFile inputFile,
@Parameter(description = "The page number to check for image on accepts 'All', ranges like '1-4'", required = false) String pageNumber) @Parameter(description = "The page number to check for image on accepts 'All', ranges like '1-4'", required = false) String pageNumber)
throws IOException, InterruptedException { throws IOException, InterruptedException {
PDDocument pdfDocument = PDDocument.load(inputFile.getInputStream()); PDDocument pdfDocument = PDDocument.load(inputFile.getInputStream());
return PdfUtils.hasImagesOnPage(null); if (PdfUtils.hasImages(pdfDocument, pageNumber))
return WebResponseUtils.pdfDocToWebResponse(pdfDocument, inputFile.getOriginalFilename());
return null;
} }
@PostMapping(consumes = "multipart/form-data", value = "/page-count") @PostMapping(consumes = "multipart/form-data", value = "/filter-page-count")
@Operation(summary = "Checks if a PDF is greater, less or equal to a setPageCount", description = "Input:PDF Output:Boolean Type:SISO") @Operation(summary = "Checks if a PDF is greater, less or equal to a setPageCount", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean pageCount( public ResponseEntity<byte[]> pageCount(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile, @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Page Count", required = true) String pageCount, @Parameter(description = "Page Count", required = true) String pageCount,
@Parameter(description = "Comparison type", @Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", "Greater", "Equal", "Less" })) String comparator)
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
throws IOException, InterruptedException { throws IOException, InterruptedException {
// Load the PDF // Load the PDF
PDDocument document = PDDocument.load(inputFile.getInputStream()); PDDocument document = PDDocument.load(inputFile.getInputStream());
int actualPageCount = document.getNumberOfPages(); int actualPageCount = document.getNumberOfPages();
boolean valid = false;
// Perform the comparison // Perform the comparison
switch (comparator) { switch (comparator) {
case "Greater": case "Greater":
return actualPageCount > Integer.parseInt(pageCount); valid = actualPageCount > Integer.parseInt(pageCount);
break;
case "Equal": case "Equal":
return actualPageCount == Integer.parseInt(pageCount); valid = actualPageCount == Integer.parseInt(pageCount);
break;
case "Less": case "Less":
return actualPageCount < Integer.parseInt(pageCount); valid = actualPageCount < Integer.parseInt(pageCount);
break;
default: default:
throw new IllegalArgumentException("Invalid comparator: " + comparator); throw new IllegalArgumentException("Invalid comparator: " + comparator);
} }
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
} }
@PostMapping(consumes = "multipart/form-data", value = "/page-size") @PostMapping(consumes = "multipart/form-data", value = "/filter-page-size")
@Operation(summary = "Checks if a PDF is of a certain size", description = "Input:PDF Output:Boolean Type:SISO") @Operation(summary = "Checks if a PDF is of a certain size", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean pageSize( public ResponseEntity<byte[]> pageSize(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile, @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Standard Page Size", required = true) String standardPageSize, @Parameter(description = "Standard Page Size", required = true) String standardPageSize,
@Parameter(description = "Comparison type", @Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", "Greater", "Equal", "Less" })) String comparator)
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
throws IOException, InterruptedException { throws IOException, InterruptedException {
// Load the PDF // Load the PDF
@ -97,55 +108,67 @@ public class FilterController {
PDRectangle standardSize = PdfUtils.textToPageSize(standardPageSize); PDRectangle standardSize = PdfUtils.textToPageSize(standardPageSize);
float standardArea = standardSize.getWidth() * standardSize.getHeight(); float standardArea = standardSize.getWidth() * standardSize.getHeight();
boolean valid = false;
// Perform the comparison // Perform the comparison
switch (comparator) { switch (comparator) {
case "Greater": case "Greater":
return actualArea > standardArea; valid = actualArea > standardArea;
break;
case "Equal": case "Equal":
return actualArea == standardArea; valid = actualArea == standardArea;
break;
case "Less": case "Less":
return actualArea < standardArea; valid = actualArea < standardArea;
break;
default: default:
throw new IllegalArgumentException("Invalid comparator: " + comparator); throw new IllegalArgumentException("Invalid comparator: " + comparator);
} }
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
} }
@PostMapping(consumes = "multipart/form-data", value = "/filter-file-size")
@PostMapping(consumes = "multipart/form-data", value = "/file-size")
@Operation(summary = "Checks if a PDF is a set file size", description = "Input:PDF Output:Boolean Type:SISO") @Operation(summary = "Checks if a PDF is a set file size", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean fileSize( public ResponseEntity<byte[]> fileSize(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile, @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "File Size", required = true) String fileSize, @Parameter(description = "File Size", required = true) String fileSize,
@Parameter(description = "Comparison type", @Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", "Greater", "Equal", "Less" })) String comparator)
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
throws IOException, InterruptedException { throws IOException, InterruptedException {
// Get the file size // Get the file size
long actualFileSize = inputFile.getSize(); long actualFileSize = inputFile.getSize();
boolean valid = false;
// Perform the comparison // Perform the comparison
switch (comparator) { switch (comparator) {
case "Greater": case "Greater":
return actualFileSize > Long.parseLong(fileSize); valid = actualFileSize > Long.parseLong(fileSize);
break;
case "Equal": case "Equal":
return actualFileSize == Long.parseLong(fileSize); valid = actualFileSize == Long.parseLong(fileSize);
break;
case "Less": case "Less":
return actualFileSize < Long.parseLong(fileSize); valid = actualFileSize < Long.parseLong(fileSize);
break;
default: default:
throw new IllegalArgumentException("Invalid comparator: " + comparator); throw new IllegalArgumentException("Invalid comparator: " + comparator);
} }
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
} }
@PostMapping(consumes = "multipart/form-data", value = "/filter-page-rotation")
@PostMapping(consumes = "multipart/form-data", value = "/page-rotation")
@Operation(summary = "Checks if a PDF is of a certain rotation", description = "Input:PDF Output:Boolean Type:SISO") @Operation(summary = "Checks if a PDF is of a certain rotation", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean pageRotation( public ResponseEntity<byte[]> pageRotation(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile, @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Rotation in degrees", required = true) int rotation, @Parameter(description = "Rotation in degrees", required = true) int rotation,
@Parameter(description = "Comparison type", @Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", "Greater", "Equal", "Less" })) String comparator)
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
throws IOException, InterruptedException { throws IOException, InterruptedException {
// Load the PDF // Load the PDF
@ -154,18 +177,26 @@ public class FilterController {
// Get the rotation of the first page // Get the rotation of the first page
PDPage firstPage = document.getPage(0); PDPage firstPage = document.getPage(0);
int actualRotation = firstPage.getRotation(); int actualRotation = firstPage.getRotation();
boolean valid = false;
// Perform the comparison // Perform the comparison
switch (comparator) { switch (comparator) {
case "Greater": case "Greater":
return actualRotation > rotation; valid = actualRotation > rotation;
break;
case "Equal": case "Equal":
return actualRotation == rotation; valid = actualRotation == rotation;
break;
case "Less": case "Less":
return actualRotation < rotation; valid = actualRotation < rotation;
break;
default: default:
throw new IllegalArgumentException("Invalid comparator: " + comparator); throw new IllegalArgumentException("Invalid comparator: " + comparator);
} }
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
} }
} }

View file

@ -292,6 +292,12 @@ public class PipelineController {
ResponseEntity<byte[]> response = restTemplate.exchange(url, HttpMethod.POST, entity, byte[].class); ResponseEntity<byte[]> response = restTemplate.exchange(url, HttpMethod.POST, entity, byte[].class);
// If the operation is filter and the response body is null or empty, skip this file
if (operation.startsWith("filter-") && (response.getBody() == null || response.getBody().length == 0)) {
logger.info("Skipping file due to failing {}", operation);
continue;
}
if (!response.getStatusCode().equals(HttpStatus.OK)) { if (!response.getStatusCode().equals(HttpStatus.OK)) {
logPrintStream.println("Error: " + response.getBody()); logPrintStream.println("Error: " + response.getBody());
hasErrors = true; hasErrors = true;

View file

@ -39,8 +39,14 @@ public class GeneralUtils {
// loop through the page order array // loop through the page order array
for (String element : pageOrderArr) { for (String element : pageOrderArr) {
// check if the element contains a range of pages if (element.equalsIgnoreCase("all")) {
if (element.matches("\\d*n\\+?-?\\d*|\\d*\\+?n")) { for (int i = 0; i < totalPages; i++) {
newPageOrder.add(i);
}
// As all pages are already added, no need to check further
break;
}
else if (element.matches("\\d*n\\+?-?\\d*|\\d*\\+?n")) {
// Handle page order as a function // Handle page order as a function
int coefficient = 0; int coefficient = 0;
int constant = 0; int constant = 0;

View file

@ -68,44 +68,38 @@ public class PdfUtils {
} }
} }
public boolean hasImageInFile(PDDocument pdfDocument, String text, String pagesToCheck) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
String pdfText = "";
if(pagesToCheck == null || pagesToCheck.equals("all")) {
pdfText = textStripper.getText(pdfDocument);
} else {
// remove whitespaces
pagesToCheck = pagesToCheck.replaceAll("\\s+", "");
String[] splitPoints = pagesToCheck.split(",");
for (String splitPoint : splitPoints) {
if (splitPoint.contains("-")) {
// Handle page ranges
String[] range = splitPoint.split("-");
int startPage = Integer.parseInt(range[0]);
int endPage = Integer.parseInt(range[1]);
for (int i = startPage; i <= endPage; i++) { public static boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
textStripper.setStartPage(i); String[] pageOrderArr = pagesToCheck.split(",");
textStripper.setEndPage(i); List<Integer> pageList = GeneralUtils.parsePageList(pageOrderArr, document.getNumberOfPages());
pdfText += textStripper.getText(pdfDocument);
} for (int pageNumber : pageList) {
} else { PDPage page = document.getPage(pageNumber);
// Handle individual page if (hasImagesOnPage(page)) {
int page = Integer.parseInt(splitPoint); return true;
textStripper.setStartPage(page);
textStripper.setEndPage(page);
pdfText += textStripper.getText(pdfDocument);
}
} }
} }
pdfDocument.close(); return false;
return pdfText.contains(text);
} }
public static boolean hasText(PDDocument document, String pageNumbersToCheck, String phrase) throws IOException {
String[] pageOrderArr = pageNumbersToCheck.split(",");
List<Integer> pageList = GeneralUtils.parsePageList(pageOrderArr, document.getNumberOfPages());
for (int pageNumber : pageList) {
PDPage page = document.getPage(pageNumber);
if (hasTextOnPage(page, phrase)) {
return true;
}
}
return false;
}
public static boolean hasImagesOnPage(PDPage page) throws IOException { public static boolean hasImagesOnPage(PDPage page) throws IOException {
ImageFinder imageFinder = new ImageFinder(page); ImageFinder imageFinder = new ImageFinder(page);
imageFinder.processPage(page); imageFinder.processPage(page);
@ -113,10 +107,15 @@ public class PdfUtils {
} }
public static boolean hasText(PDDocument document, String phrase) throws IOException {
PDFTextStripper pdfStripper = new PDFTextStripper();
String text = pdfStripper.getText(document); public static boolean hasTextOnPage(PDPage page, String phrase) throws IOException {
return text.contains(phrase); PDFTextStripper textStripper = new PDFTextStripper();
PDDocument tempDoc = new PDDocument();
tempDoc.addPage(page);
String pageText = textStripper.getText(tempDoc);
tempDoc.close();
return pageText.contains(phrase);
} }

View file

@ -10,6 +10,7 @@ import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus; import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType; import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
public class WebResponseUtils { public class WebResponseUtils {
@ -21,6 +22,16 @@ public class WebResponseUtils {
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), docName, mediaType); return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), docName, mediaType);
} }
public static ResponseEntity<byte[]> multiPartFileToWebResponse(MultipartFile file) throws IOException {
String fileName = file.getOriginalFilename();
MediaType mediaType = MediaType.parseMediaType(file.getContentType());
byte[] bytes = file.getBytes();
return bytesToWebResponse(bytes, fileName, mediaType);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName, MediaType mediaType) throws IOException { public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName, MediaType mediaType) throws IOException {
// Return the PDF as a response // Return the PDF as a response