This commit is contained in:
Anthony Stirling 2023-07-16 00:36:58 +01:00
parent 9af1b0cfdc
commit 29aabdfba8
5 changed files with 302 additions and 249 deletions

View file

@ -22,68 +22,79 @@ import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.WebResponseUtils;
import io.swagger.v3.oas.annotations.media.Schema;
@RestController
@Tag(name = "Filter", description = "Filter APIs")
public class FilterController {
@PostMapping(consumes = "multipart/form-data", value = "/contains-text")
@PostMapping(consumes = "multipart/form-data", value = "/filter-contains-text")
@Operation(summary = "Checks if a PDF contains set text, returns true if does", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean containsText(
public ResponseEntity<byte[]> containsText(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be converted to a PDF/A file", required = true) MultipartFile inputFile,
@Parameter(description = "The text to check for", required = true) String text,
@Parameter(description = "The page number to check for text on accepts 'All', ranges like '1-4'", required = false) String pageNumber)
throws IOException, InterruptedException {
PDDocument pdfDocument = PDDocument.load(inputFile.getInputStream());
return PdfUtils.hasText(pdfDocument, pageNumber);
if (PdfUtils.hasText(pdfDocument, pageNumber, text))
return WebResponseUtils.pdfDocToWebResponse(pdfDocument, inputFile.getOriginalFilename());
return null;
}
//TODO
@PostMapping(consumes = "multipart/form-data", value = "/contains-image")
// TODO
@PostMapping(consumes = "multipart/form-data", value = "/filter-contains-image")
@Operation(summary = "Checks if a PDF contains an image", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean containsImage(
public ResponseEntity<byte[]> containsImage(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to be converted to a PDF/A file", required = true) MultipartFile inputFile,
@Parameter(description = "The page number to check for image on accepts 'All', ranges like '1-4'", required = false) String pageNumber)
throws IOException, InterruptedException {
PDDocument pdfDocument = PDDocument.load(inputFile.getInputStream());
return PdfUtils.hasImagesOnPage(null);
if (PdfUtils.hasImages(pdfDocument, pageNumber))
return WebResponseUtils.pdfDocToWebResponse(pdfDocument, inputFile.getOriginalFilename());
return null;
}
@PostMapping(consumes = "multipart/form-data", value = "/page-count")
@PostMapping(consumes = "multipart/form-data", value = "/filter-page-count")
@Operation(summary = "Checks if a PDF is greater, less or equal to a setPageCount", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean pageCount(
public ResponseEntity<byte[]> pageCount(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Page Count", required = true) String pageCount,
@Parameter(description = "Comparison type",
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than",
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
@Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
"Greater", "Equal", "Less" })) String comparator)
throws IOException, InterruptedException {
// Load the PDF
PDDocument document = PDDocument.load(inputFile.getInputStream());
int actualPageCount = document.getNumberOfPages();
boolean valid = false;
// Perform the comparison
switch (comparator) {
case "Greater":
return actualPageCount > Integer.parseInt(pageCount);
valid = actualPageCount > Integer.parseInt(pageCount);
break;
case "Equal":
return actualPageCount == Integer.parseInt(pageCount);
valid = actualPageCount == Integer.parseInt(pageCount);
break;
case "Less":
return actualPageCount < Integer.parseInt(pageCount);
valid = actualPageCount < Integer.parseInt(pageCount);
break;
default:
throw new IllegalArgumentException("Invalid comparator: " + comparator);
}
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
}
@PostMapping(consumes = "multipart/form-data", value = "/page-size")
@PostMapping(consumes = "multipart/form-data", value = "/filter-page-size")
@Operation(summary = "Checks if a PDF is of a certain size", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean pageSize(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Standard Page Size", required = true) String standardPageSize,
@Parameter(description = "Comparison type",
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than",
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
throws IOException, InterruptedException {
public ResponseEntity<byte[]> pageSize(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Standard Page Size", required = true) String standardPageSize,
@Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
"Greater", "Equal", "Less" })) String comparator)
throws IOException, InterruptedException {
// Load the PDF
PDDocument document = PDDocument.load(inputFile.getInputStream());
@ -97,75 +108,95 @@ public class FilterController {
PDRectangle standardSize = PdfUtils.textToPageSize(standardPageSize);
float standardArea = standardSize.getWidth() * standardSize.getHeight();
boolean valid = false;
// Perform the comparison
switch (comparator) {
case "Greater":
return actualArea > standardArea;
valid = actualArea > standardArea;
break;
case "Equal":
return actualArea == standardArea;
valid = actualArea == standardArea;
break;
case "Less":
return actualArea < standardArea;
valid = actualArea < standardArea;
break;
default:
throw new IllegalArgumentException("Invalid comparator: " + comparator);
}
}
@PostMapping(consumes = "multipart/form-data", value = "/file-size")
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
}
@PostMapping(consumes = "multipart/form-data", value = "/filter-file-size")
@Operation(summary = "Checks if a PDF is a set file size", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean fileSize(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "File Size", required = true) String fileSize,
@Parameter(description = "Comparison type",
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than",
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
throws IOException, InterruptedException {
public ResponseEntity<byte[]> fileSize(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "File Size", required = true) String fileSize,
@Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
"Greater", "Equal", "Less" })) String comparator)
throws IOException, InterruptedException {
// Get the file size
long actualFileSize = inputFile.getSize();
boolean valid = false;
// Perform the comparison
switch (comparator) {
case "Greater":
return actualFileSize > Long.parseLong(fileSize);
valid = actualFileSize > Long.parseLong(fileSize);
break;
case "Equal":
return actualFileSize == Long.parseLong(fileSize);
valid = actualFileSize == Long.parseLong(fileSize);
break;
case "Less":
return actualFileSize < Long.parseLong(fileSize);
valid = actualFileSize < Long.parseLong(fileSize);
break;
default:
throw new IllegalArgumentException("Invalid comparator: " + comparator);
}
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
}
@PostMapping(consumes = "multipart/form-data", value = "/page-rotation")
@PostMapping(consumes = "multipart/form-data", value = "/filter-page-rotation")
@Operation(summary = "Checks if a PDF is of a certain rotation", description = "Input:PDF Output:Boolean Type:SISO")
public Boolean pageRotation(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Rotation in degrees", required = true) int rotation,
@Parameter(description = "Comparison type",
schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than",
allowableValues = {"Greater", "Equal", "Less"})) String comparator)
throws IOException, InterruptedException {
public ResponseEntity<byte[]> pageRotation(
@RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file", required = true) MultipartFile inputFile,
@Parameter(description = "Rotation in degrees", required = true) int rotation,
@Parameter(description = "Comparison type", schema = @Schema(description = "The comparison type, accepts Greater, Equal, Less than", allowableValues = {
"Greater", "Equal", "Less" })) String comparator)
throws IOException, InterruptedException {
// Load the PDF
PDDocument document = PDDocument.load(inputFile.getInputStream());
// Get the rotation of the first page
PDPage firstPage = document.getPage(0);
int actualRotation = firstPage.getRotation();
boolean valid = false;
// Perform the comparison
switch (comparator) {
case "Greater":
return actualRotation > rotation;
valid = actualRotation > rotation;
break;
case "Equal":
return actualRotation == rotation;
valid = actualRotation == rotation;
break;
case "Less":
return actualRotation < rotation;
valid = actualRotation < rotation;
break;
default:
throw new IllegalArgumentException("Invalid comparator: " + comparator);
}
if (valid)
return WebResponseUtils.multiPartFileToWebResponse(inputFile);
return null;
}
}

View file

@ -292,12 +292,18 @@ public class PipelineController {
ResponseEntity<byte[]> response = restTemplate.exchange(url, HttpMethod.POST, entity, byte[].class);
// If the operation is filter and the response body is null or empty, skip this file
if (operation.startsWith("filter-") && (response.getBody() == null || response.getBody().length == 0)) {
logger.info("Skipping file due to failing {}", operation);
continue;
}
if (!response.getStatusCode().equals(HttpStatus.OK)) {
logPrintStream.println("Error: " + response.getBody());
hasErrors = true;
continue;
}
// Define filename
String filename;

View file

@ -1,107 +1,113 @@
package stirling.software.SPDF.utils;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class GeneralUtils {
public static Long convertSizeToBytes(String sizeStr) {
if (sizeStr == null) {
return null;
}
sizeStr = sizeStr.trim().toUpperCase();
try {
if (sizeStr.endsWith("KB")) {
return (long) (Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024);
} else if (sizeStr.endsWith("MB")) {
return (long) (Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024 * 1024);
} else if (sizeStr.endsWith("GB")) {
return (long) (Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024 * 1024 * 1024);
} else if (sizeStr.endsWith("B")) {
return Long.parseLong(sizeStr.substring(0, sizeStr.length() - 1));
} else {
// Input string does not have a valid format, handle this case
}
} catch (NumberFormatException e) {
// The numeric part of the input string cannot be parsed, handle this case
}
return null;
}
public static List<Integer> parsePageList(String[] pageOrderArr, int totalPages) {
List<Integer> newPageOrder = new ArrayList<>();
// loop through the page order array
for (String element : pageOrderArr) {
// check if the element contains a range of pages
if (element.matches("\\d*n\\+?-?\\d*|\\d*\\+?n")) {
// Handle page order as a function
int coefficient = 0;
int constant = 0;
boolean coefficientExists = false;
boolean constantExists = false;
if (element.contains("n")) {
String[] parts = element.split("n");
if (!parts[0].equals("") && parts[0] != null) {
coefficient = Integer.parseInt(parts[0]);
coefficientExists = true;
}
if (parts.length > 1 && !parts[1].equals("") && parts[1] != null) {
constant = Integer.parseInt(parts[1]);
constantExists = true;
}
} else if (element.contains("+")) {
constant = Integer.parseInt(element.replace("+", ""));
constantExists = true;
}
for (int i = 1; i <= totalPages; i++) {
int pageNum = coefficientExists ? coefficient * i : i;
pageNum += constantExists ? constant : 0;
if (pageNum <= totalPages && pageNum > 0) {
newPageOrder.add(pageNum - 1);
}
}
} else if (element.contains("-")) {
// split the range into start and end page
String[] range = element.split("-");
int start = Integer.parseInt(range[0]);
int end = Integer.parseInt(range[1]);
// check if the end page is greater than total pages
if (end > totalPages) {
end = totalPages;
}
// loop through the range of pages
for (int j = start; j <= end; j++) {
// print the current index
newPageOrder.add(j - 1);
}
} else {
// if the element is a single page
newPageOrder.add(Integer.parseInt(element) - 1);
}
}
return newPageOrder;
}
public static boolean createDir(String path) {
Path folder = Paths.get(path);
if (!Files.exists(folder)) {
try {
Files.createDirectories(folder);
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
return true;
}
}
package stirling.software.SPDF.utils;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class GeneralUtils {
public static Long convertSizeToBytes(String sizeStr) {
if (sizeStr == null) {
return null;
}
sizeStr = sizeStr.trim().toUpperCase();
try {
if (sizeStr.endsWith("KB")) {
return (long) (Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024);
} else if (sizeStr.endsWith("MB")) {
return (long) (Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024 * 1024);
} else if (sizeStr.endsWith("GB")) {
return (long) (Double.parseDouble(sizeStr.substring(0, sizeStr.length() - 2)) * 1024 * 1024 * 1024);
} else if (sizeStr.endsWith("B")) {
return Long.parseLong(sizeStr.substring(0, sizeStr.length() - 1));
} else {
// Input string does not have a valid format, handle this case
}
} catch (NumberFormatException e) {
// The numeric part of the input string cannot be parsed, handle this case
}
return null;
}
public static List<Integer> parsePageList(String[] pageOrderArr, int totalPages) {
List<Integer> newPageOrder = new ArrayList<>();
// loop through the page order array
for (String element : pageOrderArr) {
if (element.equalsIgnoreCase("all")) {
for (int i = 0; i < totalPages; i++) {
newPageOrder.add(i);
}
// As all pages are already added, no need to check further
break;
}
else if (element.matches("\\d*n\\+?-?\\d*|\\d*\\+?n")) {
// Handle page order as a function
int coefficient = 0;
int constant = 0;
boolean coefficientExists = false;
boolean constantExists = false;
if (element.contains("n")) {
String[] parts = element.split("n");
if (!parts[0].equals("") && parts[0] != null) {
coefficient = Integer.parseInt(parts[0]);
coefficientExists = true;
}
if (parts.length > 1 && !parts[1].equals("") && parts[1] != null) {
constant = Integer.parseInt(parts[1]);
constantExists = true;
}
} else if (element.contains("+")) {
constant = Integer.parseInt(element.replace("+", ""));
constantExists = true;
}
for (int i = 1; i <= totalPages; i++) {
int pageNum = coefficientExists ? coefficient * i : i;
pageNum += constantExists ? constant : 0;
if (pageNum <= totalPages && pageNum > 0) {
newPageOrder.add(pageNum - 1);
}
}
} else if (element.contains("-")) {
// split the range into start and end page
String[] range = element.split("-");
int start = Integer.parseInt(range[0]);
int end = Integer.parseInt(range[1]);
// check if the end page is greater than total pages
if (end > totalPages) {
end = totalPages;
}
// loop through the range of pages
for (int j = start; j <= end; j++) {
// print the current index
newPageOrder.add(j - 1);
}
} else {
// if the element is a single page
newPageOrder.add(Integer.parseInt(element) - 1);
}
}
return newPageOrder;
}
public static boolean createDir(String path) {
Path folder = Paths.get(path);
if (!Files.exists(folder)) {
try {
Files.createDirectories(folder);
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
return true;
}
}

View file

@ -68,43 +68,37 @@ public class PdfUtils {
}
}
public boolean hasImageInFile(PDDocument pdfDocument, String text, String pagesToCheck) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
String pdfText = "";
if(pagesToCheck == null || pagesToCheck.equals("all")) {
pdfText = textStripper.getText(pdfDocument);
} else {
// remove whitespaces
pagesToCheck = pagesToCheck.replaceAll("\\s+", "");
public static boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
String[] pageOrderArr = pagesToCheck.split(",");
List<Integer> pageList = GeneralUtils.parsePageList(pageOrderArr, document.getNumberOfPages());
String[] splitPoints = pagesToCheck.split(",");
for (String splitPoint : splitPoints) {
if (splitPoint.contains("-")) {
// Handle page ranges
String[] range = splitPoint.split("-");
int startPage = Integer.parseInt(range[0]);
int endPage = Integer.parseInt(range[1]);
for (int i = startPage; i <= endPage; i++) {
textStripper.setStartPage(i);
textStripper.setEndPage(i);
pdfText += textStripper.getText(pdfDocument);
}
} else {
// Handle individual page
int page = Integer.parseInt(splitPoint);
textStripper.setStartPage(page);
textStripper.setEndPage(page);
pdfText += textStripper.getText(pdfDocument);
}
for (int pageNumber : pageList) {
PDPage page = document.getPage(pageNumber);
if (hasImagesOnPage(page)) {
return true;
}
}
pdfDocument.close();
return pdfText.contains(text);
return false;
}
public static boolean hasText(PDDocument document, String pageNumbersToCheck, String phrase) throws IOException {
String[] pageOrderArr = pageNumbersToCheck.split(",");
List<Integer> pageList = GeneralUtils.parsePageList(pageOrderArr, document.getNumberOfPages());
for (int pageNumber : pageList) {
PDPage page = document.getPage(pageNumber);
if (hasTextOnPage(page, phrase)) {
return true;
}
}
return false;
}
public static boolean hasImagesOnPage(PDPage page) throws IOException {
ImageFinder imageFinder = new ImageFinder(page);
@ -113,12 +107,17 @@ public class PdfUtils {
}
public static boolean hasText(PDDocument document, String phrase) throws IOException {
PDFTextStripper pdfStripper = new PDFTextStripper();
String text = pdfStripper.getText(document);
return text.contains(phrase);
}
public static boolean hasTextOnPage(PDPage page, String phrase) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();
PDDocument tempDoc = new PDDocument();
tempDoc.addPage(page);
String pageText = textStripper.getText(tempDoc);
tempDoc.close();
return pageText.contains(phrase);
}
public boolean containsTextInFile(PDDocument pdfDocument, String text, String pagesToCheck) throws IOException {
PDFTextStripper textStripper = new PDFTextStripper();

View file

@ -1,50 +1,61 @@
package stirling.software.SPDF.utils;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
public class WebResponseUtils {
public static ResponseEntity<byte[]> boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException {
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), docName);
}
public static ResponseEntity<byte[]> boasToWebResponse(ByteArrayOutputStream baos, String docName, MediaType mediaType) throws IOException {
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), docName, mediaType);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName, MediaType mediaType) throws IOException {
// Return the PDF as a response
HttpHeaders headers = new HttpHeaders();
headers.setContentType(mediaType);
headers.setContentLength(bytes.length);
String encodedDocName = URLEncoder.encode(docName, StandardCharsets.UTF_8.toString()).replaceAll("\\+", "%20");
headers.setContentDispositionFormData("attachment", encodedDocName);
return new ResponseEntity<>(bytes, headers, HttpStatus.OK);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName) throws IOException {
return bytesToWebResponse(bytes, docName, MediaType.APPLICATION_PDF);
}
public static ResponseEntity<byte[]> pdfDocToWebResponse(PDDocument document, String docName) throws IOException {
// Open Byte Array and save document to it
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.save(baos);
// Close the document
document.close();
return boasToWebResponse(baos, docName);
}
}
package stirling.software.SPDF.utils;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
public class WebResponseUtils {
public static ResponseEntity<byte[]> boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException {
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), docName);
}
public static ResponseEntity<byte[]> boasToWebResponse(ByteArrayOutputStream baos, String docName, MediaType mediaType) throws IOException {
return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), docName, mediaType);
}
public static ResponseEntity<byte[]> multiPartFileToWebResponse(MultipartFile file) throws IOException {
String fileName = file.getOriginalFilename();
MediaType mediaType = MediaType.parseMediaType(file.getContentType());
byte[] bytes = file.getBytes();
return bytesToWebResponse(bytes, fileName, mediaType);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName, MediaType mediaType) throws IOException {
// Return the PDF as a response
HttpHeaders headers = new HttpHeaders();
headers.setContentType(mediaType);
headers.setContentLength(bytes.length);
String encodedDocName = URLEncoder.encode(docName, StandardCharsets.UTF_8.toString()).replaceAll("\\+", "%20");
headers.setContentDispositionFormData("attachment", encodedDocName);
return new ResponseEntity<>(bytes, headers, HttpStatus.OK);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName) throws IOException {
return bytesToWebResponse(bytes, docName, MediaType.APPLICATION_PDF);
}
public static ResponseEntity<byte[]> pdfDocToWebResponse(PDDocument document, String docName) throws IOException {
// Open Byte Array and save document to it
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.save(baos);
// Close the document
document.close();
return boasToWebResponse(baos, docName);
}
}