Merge pull request #102 from Frooodle/cleanups

format and move everything, other in own folder
This commit is contained in:
Anthony Stirling 2023-04-23 09:45:01 +01:00 committed by GitHub
commit 93fb571725
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 702 additions and 763 deletions

View file

@ -1,4 +1,5 @@
package stirling.software.SPDF;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Socket;
@ -7,21 +8,35 @@ import java.util.concurrent.Executors;
public class LibreOfficeListener {
private static final LibreOfficeListener INSTANCE = new LibreOfficeListener();
private static final long ACTIVITY_TIMEOUT = 20 * 60 * 1000; // 20 minutes
private static final LibreOfficeListener INSTANCE = new LibreOfficeListener();
private static final int LISTENER_PORT = 2002;
private ExecutorService executorService;
private Process process;
private long lastActivityTime;
private LibreOfficeListener() {}
public static LibreOfficeListener getInstance() {
return INSTANCE;
}
private ExecutorService executorService;
private long lastActivityTime;
private Process process;
private LibreOfficeListener() {
}
private boolean isListenerRunning() {
try {
System.out.println("waiting for listener to start");
Socket socket = new Socket();
socket.connect(new InetSocketAddress("localhost", 2002), 1000); // Timeout after 1 second
socket.close();
return true;
} catch (IOException e) {
return false;
}
}
public void start() throws IOException {
// Check if the listener is already running
if (process != null && process.isAlive()) {
@ -50,7 +65,6 @@ public class LibreOfficeListener {
}
});
// Wait for the listener to start up
long startTime = System.currentTimeMillis();
long timeout = 30000; // Timeout after 30 seconds
@ -69,18 +83,6 @@ public class LibreOfficeListener {
}
}
private boolean isListenerRunning() {
try {
System.out.println("waiting for listener to start");
Socket socket = new Socket();
socket.connect(new InetSocketAddress("localhost", 2002), 1000); // Timeout after 1 second
socket.close();
return true;
} catch (IOException e) {
return false;
}
}
public synchronized void stop() {
// Stop the activity timeout monitor thread
executorService.shutdownNow();

View file

@ -3,41 +3,40 @@ package stirling.software.SPDF.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class AppConfig {
@Bean(name = "appName")
public String appName() {
String appName = System.getProperty("APP_HOME_NAME");
if (appName == null)
appName = System.getenv("APP_HOME_NAME");
return (appName != null) ? appName : "Stirling PDF";
}
@Bean(name = "appVersion")
public String appVersion() {
String version = getClass().getPackage().getImplementationVersion();
return (version != null) ? version : "0.3.3";
}
@Bean(name = "appName")
public String appName() {
String appName = System.getProperty("APP_HOME_NAME");
if(appName == null)
appName = System.getenv("APP_HOME_NAME");
return (appName != null) ? appName : "Stirling PDF";
@Bean(name = "homeText")
public String homeText() {
String homeText = System.getProperty("APP_HOME_DESCRIPTION");
if (homeText == null)
homeText = System.getenv("APP_HOME_DESCRIPTION");
return (homeText != null) ? homeText : "null";
}
@Bean(name = "navBarText")
public String navBarText() {
String navBarText = System.getProperty("APP_NAVBAR_NAME");
if(navBarText == null)
if (navBarText == null)
navBarText = System.getenv("APP_NAVBAR_NAME");
if(navBarText == null)
if (navBarText == null)
navBarText = System.getProperty("APP_HOME_NAME");
if(navBarText == null)
if (navBarText == null)
navBarText = System.getenv("APP_HOME_NAME");
return (navBarText != null) ? navBarText : "Stirling PDF";
}
@Bean(name = "homeText")
public String homeText() {
String homeText = System.getProperty("APP_HOME_DESCRIPTION");
if(homeText == null)
homeText = System.getenv("APP_HOME_DESCRIPTION");
return (homeText != null) ? homeText : "null";
}
}

View file

@ -13,12 +13,24 @@ import org.springframework.web.servlet.i18n.SessionLocaleResolver;
@Configuration
public class Beans implements WebMvcConfigurer {
@Override
public void addInterceptors(InterceptorRegistry registry) {
registry.addInterceptor(localeChangeInterceptor());
}
@Bean
public LocaleChangeInterceptor localeChangeInterceptor() {
LocaleChangeInterceptor lci = new LocaleChangeInterceptor();
lci.setParamName("lang");
return lci;
}
@Bean
public LocaleResolver localeResolver() {
SessionLocaleResolver slr = new SessionLocaleResolver();
String appLocaleEnv = System.getProperty("APP_LOCALE");
if(appLocaleEnv == null)
if (appLocaleEnv == null)
appLocaleEnv = System.getenv("APP_LOCALE");
Locale defaultLocale = Locale.UK; // Fallback to UK locale if environment variable is not set
@ -37,16 +49,4 @@ public class Beans implements WebMvcConfigurer {
return slr;
}
@Bean
public LocaleChangeInterceptor localeChangeInterceptor() {
LocaleChangeInterceptor lci = new LocaleChangeInterceptor();
lci.setParamName("lang");
return lci;
}
@Override
public void addInterceptors(InterceptorRegistry registry) {
registry.addInterceptor(localeChangeInterceptor());
}
}

View file

@ -30,22 +30,6 @@ public class MergeController {
return "merge-pdfs";
}
@PostMapping("/merge-pdfs")
public ResponseEntity<byte[]> mergePdfs(@RequestParam("fileInput") MultipartFile[] files) throws IOException {
// Read the input PDF files into PDDocument objects
List<PDDocument> documents = new ArrayList<>();
// Loop through the files array and read each file into a PDDocument
for (MultipartFile file : files) {
documents.add(PDDocument.load(file.getInputStream()));
}
PDDocument mergedDoc = mergeDocuments(documents);
// Return the merged PDF as a response
return PdfUtils.pdfDocToWebResponse(mergedDoc, files[0].getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_merged.pdf");
}
private PDDocument mergeDocuments(List<PDDocument> documents) throws IOException {
// Create a new empty document
PDDocument mergedDoc = new PDDocument();
@ -64,4 +48,20 @@ public class MergeController {
return mergedDoc;
}
@PostMapping("/merge-pdfs")
public ResponseEntity<byte[]> mergePdfs(@RequestParam("fileInput") MultipartFile[] files) throws IOException {
// Read the input PDF files into PDDocument objects
List<PDDocument> documents = new ArrayList<>();
// Loop through the files array and read each file into a PDDocument
for (MultipartFile file : files) {
documents.add(PDDocument.load(file.getInputStream()));
}
PDDocument mergedDoc = mergeDocuments(documents);
// Return the merged PDF as a response
return PdfUtils.pdfDocToWebResponse(mergedDoc, files[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_merged.pdf");
}
}

View file

@ -1,180 +0,0 @@
package stirling.software.SPDF.controller;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class OCRController {
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
@GetMapping("/ocr-pdf")
public ModelAndView ocrPdfPage() {
ModelAndView modelAndView = new ModelAndView("ocr-pdf");
modelAndView.addObject("languages", getAvailableTesseractLanguages());
modelAndView.addObject("currentPage", "ocr-pdf");
return modelAndView;
}
@PostMapping("/ocr-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("languages") List<String> selectedLanguages,
@RequestParam(name = "sidecar", required = false) Boolean sidecar,
@RequestParam(name = "deskew", required = false) Boolean deskew,
@RequestParam(name = "clean", required = false) Boolean clean,
@RequestParam(name = "clean-final", required = false) Boolean cleanFinal,
@RequestParam(name = "ocrType", required = false) String ocrType) throws IOException, InterruptedException {
//--output-type pdfa
if (selectedLanguages == null || selectedLanguages.size() < 1) {
throw new IOException("Please select at least one language.");
}
// Validate and sanitize selected languages using regex
String languagePattern = "^[a-zA-Z]{3}$"; // Regex pattern for three-letter language codes
selectedLanguages = selectedLanguages.stream()
.filter(lang -> Pattern.matches(languagePattern, lang))
.collect(Collectors.toList());
if (selectedLanguages.isEmpty()) {
throw new IOException("None of the selected languages are valid.");
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the output file path
Path sidecarTextPath = null;
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf","--verbose", "2", "--output-type", "pdf"));
if (sidecar != null && sidecar) {
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
command.add("--sidecar");
command.add(sidecarTextPath.toString());
}
if (deskew != null && deskew) {
command.add("--deskew");
}
if (clean != null && clean) {
command.add("--clean");
}
if (cleanFinal != null && cleanFinal) {
command.add("--clean-final");
}
if (ocrType != null && !ocrType.equals("")) {
if("skip-text".equals(ocrType)) {
command.add("--skip-text");
} else if("force-ocr".equals(ocrType)) {
command.add("--force-ocr");
} else if("Normal".equals(ocrType)) {
}
}
command.addAll(Arrays.asList("--language", languageOption,
tempInputFile.toString(), tempOutputFile.toString()));
//Run CLI command
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
// Return the OCR processed PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
HttpHeaders headers = new HttpHeaders();
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
Files.copy(tempOutputFile, zipOut);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
zipOut.putNextEntry(txtEntry);
Files.copy(sidecarTextPath, zipOut);
zipOut.closeEntry();
}
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Clean up the temporary zip file
Files.delete(tempZipFile);
Files.delete(tempOutputFile);
Files.delete(sidecarTextPath);
// Return the zip file containing both the PDF and the text file
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
headers.setContentDispositionFormData("attachment", outputZipFilename);
return ResponseEntity.ok().headers(headers).body(zipBytes);
} else {
// Return the OCR processed PDF as a response
Files.delete(tempOutputFile);
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
public List<String> getAvailableTesseractLanguages() {
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
File[] files = new File(tessdataDir).listFiles();
if (files == null) {
return Collections.emptyList();
}
return Arrays.stream(files)
.filter(file -> file.getName().endsWith(".traineddata"))
.map(file -> file.getName().replace(".traineddata", ""))
.filter(lang -> !lang.equalsIgnoreCase("osd"))
.collect(Collectors.toList());
}
}

View file

@ -11,17 +11,15 @@ public class PdfController {
private static final Logger logger = LoggerFactory.getLogger(PdfController.class);
@GetMapping("/home")
public String root(Model model) {
return "redirect:/";
}
@GetMapping("/")
public String home(Model model) {
model.addAttribute("currentPage", "home");
return "home";
}
@GetMapping("/home")
public String root(Model model) {
return "redirect:/";
}
}

View file

@ -23,18 +23,6 @@ public class RearrangePagesPDFController {
private static final Logger logger = LoggerFactory.getLogger(RearrangePagesPDFController.class);
@GetMapping("/pdf-organizer")
public String pageOrganizer(Model model) {
model.addAttribute("currentPage", "pdf-organizer");
return "pdf-organizer";
}
@GetMapping("/remove-pages")
public String pageDeleter(Model model) {
model.addAttribute("currentPage", "remove-pages");
return "remove-pages";
}
@PostMapping("/remove-pages")
public ResponseEntity<byte[]> deletePages(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("pagesToDelete") String pagesToDelete) throws IOException {
@ -53,6 +41,12 @@ public class RearrangePagesPDFController {
}
@GetMapping("/remove-pages")
public String pageDeleter(Model model) {
model.addAttribute("currentPage", "remove-pages");
return "remove-pages";
}
private List<Integer> pageOrderToString(String[] pageOrderArr, int totalPages) {
List<Integer> newPageOrder = new ArrayList<>();
// loop through the page order array
@ -81,6 +75,12 @@ public class RearrangePagesPDFController {
return newPageOrder;
}
@GetMapping("/pdf-organizer")
public String pageOrganizer(Model model) {
model.addAttribute("currentPage", "pdf-organizer");
return "pdf-organizer";
}
@PostMapping("/rearrange-pages")
public ResponseEntity<byte[]> rearrangePages(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("pageOrder") String pageOrder) {
try {

View file

@ -22,12 +22,6 @@ public class RotationController {
private static final Logger logger = LoggerFactory.getLogger(RotationController.class);
@GetMapping("/rotate-pdf")
public String rotatePdfForm(Model model) {
model.addAttribute("currentPage", "rotate-pdf");
return "rotate-pdf";
}
@PostMapping("/rotate-pdf")
public ResponseEntity<byte[]> rotatePDF(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("angle") Integer angle) throws IOException {
@ -45,4 +39,10 @@ public class RotationController {
}
@GetMapping("/rotate-pdf")
public String rotatePdfForm(Model model) {
model.addAttribute("currentPage", "rotate-pdf");
return "rotate-pdf";
}
}

View file

@ -27,17 +27,12 @@ import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
@Controller
public class SplitPDFController {
private static final Logger logger = LoggerFactory.getLogger(SplitPDFController.class);
@GetMapping("/split-pdfs")
public String splitPdfForm(Model model) {
model.addAttribute("currentPage", "split-pdfs");
return "split-pdfs";
}
@PostMapping("/split-pages")
public ResponseEntity<Resource> splitPdf(@RequestParam("fileInput") MultipartFile file, @RequestParam("pages") String pages) throws IOException {
// parse user input
@ -128,7 +123,13 @@ public class SplitPDFController {
Files.delete(zipFile);
// return the Resource in the response
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
.contentLength(resource.contentLength()).body(resource);
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip")
.contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource);
}
@GetMapping("/split-pdfs")
public String splitPdfForm(Model model) {
model.addAttribute("currentPage", "split-pdfs");
return "split-pdfs";
}
}

View file

@ -25,28 +25,6 @@ public class ConvertImgPDFController {
private static final Logger logger = LoggerFactory.getLogger(ConvertImgPDFController.class);
@GetMapping("/img-to-pdf")
public String convertToPdfForm(Model model) {
model.addAttribute("currentPage", "img-to-pdf");
return "convert/img-to-pdf";
}
@GetMapping("/pdf-to-img")
public String pdfToimgForm(Model model) {
model.addAttribute("currentPage", "pdf-to-img");
return "convert/pdf-to-img";
}
@PostMapping("/img-to-pdf")
public ResponseEntity<byte[]> convertToPdf(@RequestParam("fileInput") MultipartFile[] file,
@RequestParam(defaultValue = "false", name = "stretchToFit") boolean stretchToFit,
@RequestParam(defaultValue = "true", name = "autoRotate") boolean autoRotate) throws IOException {
// Convert the file to PDF and get the resulting bytes
System.out.println(stretchToFit);
byte[] bytes = PdfUtils.imageToPdf(file, stretchToFit, autoRotate);
return PdfUtils.bytesToWebResponse(bytes, file[0].getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_coverted.pdf");
}
@PostMapping("/pdf-to-img")
public ResponseEntity<Resource> convertToImage(@RequestParam("fileInput") MultipartFile file, @RequestParam("imageFormat") String imageFormat,
@RequestParam("singleOrMultiple") String singleOrMultiple, @RequestParam("colorType") String colorType, @RequestParam("dpi") String dpi) throws IOException {
@ -78,11 +56,27 @@ public class ConvertImgPDFController {
} else {
ByteArrayResource resource = new ByteArrayResource(result);
// return the Resource in the response
return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename="+ file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip").contentType(MediaType.APPLICATION_OCTET_STREAM)
.contentLength(resource.contentLength()).body(resource);
return ResponseEntity.ok()
.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip")
.contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource);
}
}
@PostMapping("/img-to-pdf")
public ResponseEntity<byte[]> convertToPdf(@RequestParam("fileInput") MultipartFile[] file, @RequestParam(defaultValue = "false", name = "stretchToFit") boolean stretchToFit,
@RequestParam(defaultValue = "true", name = "autoRotate") boolean autoRotate) throws IOException {
// Convert the file to PDF and get the resulting bytes
System.out.println(stretchToFit);
byte[] bytes = PdfUtils.imageToPdf(file, stretchToFit, autoRotate);
return PdfUtils.bytesToWebResponse(bytes, file[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_coverted.pdf");
}
@GetMapping("/img-to-pdf")
public String convertToPdfForm(Model model) {
model.addAttribute("currentPage", "img-to-pdf");
return "convert/img-to-pdf";
}
private String getMediaType(String imageFormat) {
if (imageFormat.equalsIgnoreCase("PNG"))
return "image/png";
@ -94,4 +88,10 @@ public class ConvertImgPDFController {
return "application/octet-stream";
}
@GetMapping("/pdf-to-img")
public String pdfToimgForm(Model model) {
model.addAttribute("currentPage", "pdf-to-img");
return "convert/pdf-to-img";
}
}

View file

@ -19,28 +19,11 @@ import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class ConvertOfficeController {
@GetMapping("/file-to-pdf")
public String convertToPdfForm(Model model) {
model.addAttribute("currentPage", "file-to-pdf");
return "convert/file-to-pdf";
}
@PostMapping("/file-to-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
//unused but can start server instance if startup time is to long
//LibreOfficeListener.getInstance().start();
byte[] pdfByteArray = convertToPdf(inputFile);
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
}
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException {
// Check for valid file extension
String originalFilename = inputFile.getOriginalFilename();
if (originalFilename == null || !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) {
@ -55,12 +38,7 @@ public byte[] convertToPdf(MultipartFile inputFile) throws IOException, Interrup
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv",
"-f",
"pdf",
"-o",
tempOutputFile.toString(),
tempInputFile.toString()));
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString()));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
// Read the converted PDF file
@ -71,10 +49,27 @@ public byte[] convertToPdf(MultipartFile inputFile) throws IOException, Interrup
Files.delete(tempOutputFile);
return pdfBytes;
}
private boolean isValidFileExtension(String fileExtension) {
}
@GetMapping("/file-to-pdf")
public String convertToPdfForm(Model model) {
model.addAttribute("currentPage", "file-to-pdf");
return "convert/file-to-pdf";
}
private boolean isValidFileExtension(String fileExtension) {
String extensionPattern = "^(?i)[a-z0-9]{2,4}$";
return fileExtension.matches(extensionPattern);
}
}
@PostMapping("/file-to-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
// unused but can start server instance if startup time is to long
// LibreOfficeListener.getInstance().start();
byte[] pdfByteArray = convertToPdf(inputFile);
return PdfUtils.bytesToWebResponse(pdfByteArray, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToPDF.pdf");
}
}

View file

@ -15,12 +15,10 @@ import stirling.software.SPDF.utils.PDFToFile;
@Controller
public class ConvertPDFToOffice {
@GetMapping("/pdf-to-word")
public ModelAndView pdfToWord() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word");
modelAndView.addObject("currentPage", "pdf-to-word");
@GetMapping("/pdf-to-html")
public ModelAndView pdfToHTML() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html");
modelAndView.addObject("currentPage", "pdf-to-html");
return modelAndView;
}
@ -38,10 +36,10 @@ public class ConvertPDFToOffice {
return modelAndView;
}
@GetMapping("/pdf-to-html")
public ModelAndView pdfToHTML() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html");
modelAndView.addObject("currentPage", "pdf-to-html");
@GetMapping("/pdf-to-word")
public ModelAndView pdfToWord() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word");
modelAndView.addObject("currentPage", "pdf-to-word");
return modelAndView;
}
@ -52,33 +50,31 @@ public class ConvertPDFToOffice {
return modelAndView;
}
@PostMapping("/pdf-to-word")
public ResponseEntity<byte[]> processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
@PostMapping("/pdf-to-html")
public ResponseEntity<byte[]> processPdfToHTML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
return pdfToFile.processPdfToOfficeFormat(inputFile, "html", "writer_pdf_import");
}
@PostMapping("/pdf-to-presentation")
public ResponseEntity<byte[]> processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
public ResponseEntity<byte[]> processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat)
throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
}
@PostMapping("/pdf-to-text")
public ResponseEntity<byte[]> processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
public ResponseEntity<byte[]> processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat)
throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
@PostMapping("/pdf-to-html")
public ResponseEntity<byte[]> processPdfToHTML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
@PostMapping("/pdf-to-word")
public ResponseEntity<byte[]> processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat)
throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, "html", "writer_pdf_import");
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
@PostMapping("/pdf-to-xml")
@ -87,11 +83,4 @@ public class ConvertPDFToOffice {
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
}
}

View file

@ -17,20 +17,12 @@ import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class ConvertPDFToPDFA {
@GetMapping("/pdf-to-pdfa")
public String pdfToPdfAForm(Model model) {
model.addAttribute("currentPage", "pdf-to-pdfa");
return "convert/pdf-to-pdfa";
}
@PostMapping("/pdf-to-pdfa")
public ResponseEntity<byte[]> pdfToPdfA(
@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
public ResponseEntity<byte[]> pdfToPdfA(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
@ -64,7 +56,12 @@ public class ConvertPDFToPDFA {
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
@GetMapping("/pdf-to-pdfa")
public String pdfToPdfAForm(Model model) {
model.addAttribute("currentPage", "pdf-to-pdfa");
return "convert/pdf-to-pdfa";
}
}

View file

@ -1,4 +1,4 @@
package stirling.software.SPDF.controller;
package stirling.software.SPDF.controller.other;
import java.io.IOException;
import java.nio.file.Files;
@ -20,7 +20,6 @@ import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class CompressController {
@ -29,16 +28,13 @@ public class CompressController {
@GetMapping("/compress-pdf")
public String compressPdfForm(Model model) {
model.addAttribute("currentPage", "compress-pdf");
return "compress-pdf";
return "other/compress-pdf";
}
@PostMapping("/compress-pdf")
public ResponseEntity<byte[]> optimizePdf(
@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("optimizeLevel") int optimizeLevel,
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView,
@RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) throws IOException, InterruptedException {
public ResponseEntity<byte[]> optimizePdf(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("optimizeLevel") int optimizeLevel,
@RequestParam(name = "fastWebView", required = false) Boolean fastWebView, @RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy)
throws IOException, InterruptedException {
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
@ -57,7 +53,6 @@ public class CompressController {
command.add("--output-type");
command.add("pdf");
if (fastWebView != null && fastWebView) {
long fileSize = inputFile.getSize();
long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size
@ -87,6 +82,6 @@ public class CompressController {
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
}

View file

@ -1,4 +1,4 @@
package stirling.software.SPDF.controller;
package stirling.software.SPDF.controller.other;
import java.awt.Graphics2D;
import java.awt.Image;
@ -36,12 +36,6 @@ public class ExtractImagesController {
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
@GetMapping("/extract-images")
public String extractImagesForm(Model model) {
model.addAttribute("currentPage", "extract-images");
return "extract-images";
}
@PostMapping("/extract-images")
public ResponseEntity<Resource> extractImages(@RequestParam("fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException {
@ -72,14 +66,11 @@ public class ExtractImagesController {
RenderedImage renderedImage = image.getImage();
BufferedImage bufferedImage = null;
if (format.equalsIgnoreCase("png")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_INT_ARGB);
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_ARGB);
} else if (format.equalsIgnoreCase("jpeg") || format.equalsIgnoreCase("jpg")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_INT_RGB);
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_INT_RGB);
} else if (format.equalsIgnoreCase("gif")) {
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(),
BufferedImage.TYPE_BYTE_INDEXED);
bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_BYTE_INDEXED);
}
// Write image to zip file
@ -95,7 +86,6 @@ public class ExtractImagesController {
ImageIO.write(bufferedImage, format, imageBaos);
zos.write(imageBaos.toByteArray());
zos.closeEntry();
imageIndex++;
}
@ -110,20 +100,22 @@ public class ExtractImagesController {
byte[] zipContents = baos.toByteArray();
ByteArrayResource resource = new ByteArrayResource(zipContents);
// Set content disposition header to indicate that the response should be downloaded as a file
// Set content disposition header to indicate that the response should be
// downloaded as a file
HttpHeaders headers = new HttpHeaders();
headers.setContentLength(zipContents.length);
headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip");
// Return ResponseEntity with ByteArrayResource and headers
return ResponseEntity
.status(HttpStatus.OK)
.headers(headers)
return ResponseEntity.status(HttpStatus.OK).headers(headers)
.header("Cache-Control", "no-cache")
.contentType(MediaType.APPLICATION_OCTET_STREAM)
.body(resource);
.header("Cache-Control", "no-cache").contentType(MediaType.APPLICATION_OCTET_STREAM).body(resource);
}
@GetMapping("/extract-images")
public String extractImagesForm(Model model) {
model.addAttribute("currentPage", "extract-images");
return "other/extract-images";
}
}

View file

@ -1,4 +1,4 @@
package stirling.software.SPDF.controller.security;
package stirling.software.SPDF.controller.other;
import java.io.IOException;
import java.text.ParseException;
@ -26,12 +26,12 @@ public class MetadataController {
@GetMapping("/change-metadata")
public String addWatermarkForm(Model model) {
model.addAttribute("currentPage", "change-metadata");
return "security/change-metadata";
return "other/change-metadata";
}
private String checkUndefined(String entry) {
// Check if the string is "undefined"
if("undefined".equals(entry)) {
if ("undefined".equals(entry)) {
// Return null if it is
return null;
}
@ -39,6 +39,7 @@ public class MetadataController {
return entry;
}
@PostMapping("/update-metadata")
public ResponseEntity<byte[]> metadata(@RequestParam("fileInput") MultipartFile pdfFile,
@RequestParam(value = "deleteAll", required = false, defaultValue = "false") Boolean deleteAll, @RequestParam(value = "author", required = false) String author,
@ -65,7 +66,8 @@ public class MetadataController {
title = checkUndefined(title);
trapped = checkUndefined(trapped);
// If the "deleteAll" flag is set, remove all metadata from the document information
// If the "deleteAll" flag is set, remove all metadata from the document
// information
if (deleteAll) {
for (String key : info.getMetadataKeys()) {
info.setCustomMetadataValue(key, null);
@ -133,6 +135,4 @@ public class MetadataController {
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_metadata.pdf");
}
}

View file

@ -0,0 +1,168 @@
package stirling.software.SPDF.controller.other;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class OCRController {
private static final Logger logger = LoggerFactory.getLogger(OCRController.class);
public List<String> getAvailableTesseractLanguages() {
String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
File[] files = new File(tessdataDir).listFiles();
if (files == null) {
return Collections.emptyList();
}
return Arrays.stream(files).filter(file -> file.getName().endsWith(".traineddata")).map(file -> file.getName().replace(".traineddata", ""))
.filter(lang -> !lang.equalsIgnoreCase("osd")).collect(Collectors.toList());
}
@GetMapping("/ocr-pdf")
public ModelAndView ocrPdfPage() {
ModelAndView modelAndView = new ModelAndView("other/ocr-pdf");
modelAndView.addObject("languages", getAvailableTesseractLanguages());
modelAndView.addObject("currentPage", "ocr-pdf");
return modelAndView;
}
@PostMapping("/ocr-pdf")
public ResponseEntity<byte[]> processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("languages") List<String> selectedLanguages,
@RequestParam(name = "sidecar", required = false) Boolean sidecar, @RequestParam(name = "deskew", required = false) Boolean deskew,
@RequestParam(name = "clean", required = false) Boolean clean, @RequestParam(name = "clean-final", required = false) Boolean cleanFinal,
@RequestParam(name = "ocrType", required = false) String ocrType) throws IOException, InterruptedException {
// --output-type pdfa
if (selectedLanguages == null || selectedLanguages.size() < 1) {
throw new IOException("Please select at least one language.");
}
// Validate and sanitize selected languages using regex
String languagePattern = "^[a-zA-Z]{3}$"; // Regex pattern for three-letter language codes
selectedLanguages = selectedLanguages.stream().filter(lang -> Pattern.matches(languagePattern, lang)).collect(Collectors.toList());
if (selectedLanguages.isEmpty()) {
throw new IOException("None of the selected languages are valid.");
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the output file path
Path sidecarTextPath = null;
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
List<String> command = new ArrayList<>(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf"));
if (sidecar != null && sidecar) {
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
command.add("--sidecar");
command.add(sidecarTextPath.toString());
}
if (deskew != null && deskew) {
command.add("--deskew");
}
if (clean != null && clean) {
command.add("--clean");
}
if (cleanFinal != null && cleanFinal) {
command.add("--clean-final");
}
if (ocrType != null && !ocrType.equals("")) {
if ("skip-text".equals(ocrType)) {
command.add("--skip-text");
} else if ("force-ocr".equals(ocrType)) {
command.add("--force-ocr");
} else if ("Normal".equals(ocrType)) {
}
}
command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString()));
// Run CLI command
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
// Return the OCR processed PDF as a response
String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf";
HttpHeaders headers = new HttpHeaders();
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
Files.copy(tempOutputFile, zipOut);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
zipOut.putNextEntry(txtEntry);
Files.copy(sidecarTextPath, zipOut);
zipOut.closeEntry();
}
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Clean up the temporary zip file
Files.delete(tempZipFile);
Files.delete(tempOutputFile);
Files.delete(sidecarTextPath);
// Return the zip file containing both the PDF and the text file
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
headers.setContentDispositionFormData("attachment", outputZipFilename);
return ResponseEntity.ok().headers(headers).body(zipBytes);
} else {
// Return the OCR processed PDF as a response
Files.delete(tempOutputFile);
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentDispositionFormData("attachment", outputFilename);
return ResponseEntity.ok().headers(headers).body(pdfBytes);
}
}
}

View file

@ -1,4 +1,4 @@
package stirling.software.SPDF.controller;
package stirling.software.SPDF.controller.other;
import java.io.IOException;
@ -23,7 +23,7 @@ public class OverlayImageController {
@GetMapping("/add-image")
public String overlayImage(Model model) {
model.addAttribute("currentPage", "add-image");
return "add-image";
return "other/add-image";
}
@PostMapping("/add-image")

View file

@ -28,23 +28,11 @@ public class PasswordController {
return "security/add-password";
}
@GetMapping("/remove-password")
public String removePasswordForm(Model model) {
model.addAttribute("currentPage", "remove-password");
return "security/remove-password";
}
@GetMapping("/change-permissions")
public String permissionsForm(Model model) {
model.addAttribute("currentPage", "change-permissions");
return "security/change-permissions";
}
@PostMapping("/remove-password")
public ResponseEntity<byte[]> compressPDF(@RequestParam("fileInput") MultipartFile fileInput, @RequestParam(name = "password") String password) throws IOException {
PDDocument document = PDDocument.load(fileInput.getBytes(), password);
document.setAllSecurityToBeRemoved(true);
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_password_removed.pdf");
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_password_removed.pdf");
}
@PostMapping("/add-password")
@ -75,7 +63,19 @@ public class PasswordController {
document.protect(spp);
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "")+ "_passworded.pdf");
return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_passworded.pdf");
}
@GetMapping("/change-permissions")
public String permissionsForm(Model model) {
model.addAttribute("currentPage", "change-permissions");
return "security/change-permissions";
}
@GetMapping("/remove-password")
public String removePasswordForm(Model model) {
model.addAttribute("currentPage", "remove-password");
return "security/remove-password";
}
}

View file

@ -31,24 +31,11 @@ import stirling.software.SPDF.utils.WatermarkRemover;
@Controller
public class WatermarkController {
@GetMapping("/add-watermark")
public String addWatermarkForm(Model model) {
model.addAttribute("currentPage", "add-watermark");
return "security/add-watermark";
}
@GetMapping("/remove-watermark")
public String removeWatermarkForm(Model model) {
model.addAttribute("currentPage", "remove-watermark");
return "security/remove-watermark";
}
@PostMapping("/add-watermark")
public ResponseEntity<byte[]> addWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText,
@RequestParam(defaultValue = "30", name = "fontSize") float fontSize, @RequestParam(defaultValue = "0", name = "rotation") float rotation,
@RequestParam(defaultValue = "0.5", name = "opacity") float opacity,
@RequestParam(defaultValue = "50", name = "widthSpacer") int widthSpacer, @RequestParam(defaultValue = "50", name = "heightSpacer") int heightSpacer)
throws IOException {
@RequestParam(defaultValue = "0.5", name = "opacity") float opacity, @RequestParam(defaultValue = "50", name = "widthSpacer") int widthSpacer,
@RequestParam(defaultValue = "50", name = "heightSpacer") int heightSpacer) throws IOException {
// Load the input PDF
PDDocument document = PDDocument.load(pdfFile.getInputStream());
@ -56,9 +43,6 @@ public class WatermarkController {
// Create a page in the document
for (PDPage page : document.getPages()) {
// Get the page's content stream
PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true);
@ -97,8 +81,11 @@ public class WatermarkController {
return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf");
}
@GetMapping("/add-watermark")
public String addWatermarkForm(Model model) {
model.addAttribute("currentPage", "add-watermark");
return "security/add-watermark";
}
@PostMapping("/remove-watermark")
public ResponseEntity<byte[]> removeWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText) throws Exception {
@ -115,7 +102,8 @@ public class WatermarkController {
PDPage page = document.getPage(i);
// Process the content stream to remove the watermark text
WatermarkRemover editor = new WatermarkRemover(watermarkText) {};
WatermarkRemover editor = new WatermarkRemover(watermarkText) {
};
editor.processPage(page);
editor.processPage(page);
// Add the page to the output document
@ -153,5 +141,10 @@ public class WatermarkController {
return PdfUtils.pdfDocToWebResponse(outputDocument, "removed.pdf");
}
@GetMapping("/remove-watermark")
public String removeWatermarkForm(Model model) {
model.addAttribute("currentPage", "remove-watermark");
return "security/remove-watermark";
}
}

View file

@ -1,4 +1,5 @@
package stirling.software.SPDF.utils;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
@ -19,9 +20,9 @@ import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
public class PDFToFile {
public ResponseEntity<byte[]> processPdfToOfficeFormat(MultipartFile inputFile, String outputFormat, String libreOfficeFilter)
throws IOException, InterruptedException {
public ResponseEntity<byte[]> processPdfToOfficeFormat(MultipartFile inputFile, String outputFormat, String libreOfficeFilter) throws IOException, InterruptedException {
if (!"application/pdf".equals(inputFile.getContentType())) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
@ -32,7 +33,7 @@ public class PDFToFile {
String pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
// Validate output format
List<String> allowedFormats = Arrays.asList("doc", "docx", "odt", "ppt", "pptx", "odp", "rtf", "html","xml","txt:Text");
List<String> allowedFormats = Arrays.asList("doc", "docx", "odt", "ppt", "pptx", "odp", "rtf", "html", "xml", "txt:Text");
if (!allowedFormats.contains(outputFormat)) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}
@ -52,9 +53,8 @@ public class PDFToFile {
tempOutputDir = Files.createTempDirectory("output_");
// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList(
"soffice", "--infilter=" + libreOfficeFilter, "--convert-to", outputFormat, "--outdir", tempOutputDir.toString(), tempInputFile.toString()
));
List<String> command = new ArrayList<>(
Arrays.asList("soffice", "--infilter=" + libreOfficeFilter, "--convert-to", outputFormat, "--outdir", tempOutputDir.toString(), tempInputFile.toString()));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
// Get output files
@ -64,8 +64,8 @@ public class PDFToFile {
// Return single output file
File outputFile = outputFiles.get(0);
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
if(outputFormat.equals("txt:Text")) {
outputFormat="txt";
if (outputFormat.equals("txt:Text")) {
outputFormat = "txt";
}
headers.setContentDispositionFormData("attachment", pdfBaseName + "." + outputFormat);
fileBytes = FileUtils.readFileToByteArray(outputFile);

View file

@ -9,6 +9,12 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.security.KeyPair;
import java.security.KeyStore;
import java.security.PrivateKey;
import java.security.PublicKey;
import java.security.cert.Certificate;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -31,18 +37,79 @@ import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
import java.io.InputStream;
import java.security.KeyPair;
import java.security.KeyStore;
import java.security.PrivateKey;
import java.security.PublicKey;
import java.security.cert.Certificate;
import java.security.cert.X509Certificate;
public class PdfUtils {
private static final Logger logger = LoggerFactory.getLogger(PdfUtils.class);
public static ResponseEntity<byte[]> boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException {
return PdfUtils.bytesToWebResponse(baos.toByteArray(), docName);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName) throws IOException {
// Return the PDF as a response
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentLength(bytes.length);
headers.setContentDispositionFormData("attachment", docName);
return new ResponseEntity<>(bytes, headers, HttpStatus.OK);
}
public static byte[] convertFromPdf(byte[] inputStream, String imageType, ImageType colorType, boolean singleImage, int DPI) throws IOException, Exception {
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputStream))) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
int pageCount = document.getNumberOfPages();
List<BufferedImage> images = new ArrayList<>();
// Create images of all pages
for (int i = 0; i < pageCount; i++) {
images.add(pdfRenderer.renderImageWithDPI(i, 300, colorType));
}
if (singleImage) {
// Combine all images into a single big image
BufferedImage combined = new BufferedImage(images.get(0).getWidth(), images.get(0).getHeight() * pageCount, BufferedImage.TYPE_INT_RGB);
Graphics g = combined.getGraphics();
for (int i = 0; i < images.size(); i++) {
g.drawImage(images.get(i), 0, i * images.get(0).getHeight(), null);
}
images = Arrays.asList(combined);
}
// Create a ByteArrayOutputStream to save the image(s) to
ByteArrayOutputStream baos = new ByteArrayOutputStream();
if (singleImage) {
// Write the image to the output stream
ImageIO.write(images.get(0), imageType, baos);
// Log that the image was successfully written to the byte array
logger.info("Image successfully written to byte array");
} else {
// Zip the images and return as byte array
try (ZipOutputStream zos = new ZipOutputStream(baos)) {
for (int i = 0; i < images.size(); i++) {
BufferedImage image = images.get(i);
try (ByteArrayOutputStream baosImage = new ByteArrayOutputStream()) {
ImageIO.write(image, imageType, baosImage);
// Add the image to the zip file
zos.putNextEntry(new ZipEntry(String.format("page_%d.%s", i + 1, imageType.toLowerCase())));
zos.write(baosImage.toByteArray());
}
}
// Log that the images were successfully written to the byte array
logger.info("Images successfully written to byte array as a zip");
}
}
return baos.toByteArray();
} catch (IOException e) {
// Log an error message if there is an issue converting the PDF to an image
logger.error("Error converting PDF to image", e);
throw e;
}
}
public static byte[] imageToPdf(MultipartFile[] files, boolean stretchToFit, boolean autoRotate) throws IOException {
try (PDDocument doc = new PDDocument()) {
for (MultipartFile file : files) {
@ -73,7 +140,8 @@ public class PdfUtils {
float pageHeight = page.getMediaBox().getHeight();
if (autoRotate && ((image.getWidth() > image.getHeight() && pageHeight > pageWidth) || (image.getWidth() < image.getHeight() && pageWidth > pageHeight))) {
// Rotate the page 90 degrees if the image better fits the page in landscape orientation
// Rotate the page 90 degrees if the image better fits the page in landscape
// orientation
page.setRotation(90);
pageWidth = page.getMediaBox().getHeight();
pageHeight = page.getMediaBox().getWidth();
@ -136,60 +204,31 @@ public class PdfUtils {
}
public static X509Certificate[] loadCertificateChainFromKeystore(InputStream keystoreInputStream, String keystorePassword) throws Exception {
KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType());
keystore.load(keystoreInputStream, keystorePassword.toCharArray());
public static byte[] convertFromPdf(byte[] inputStream, String imageType, ImageType colorType, boolean singleImage, int DPI)
throws IOException, Exception {
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputStream))) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
int pageCount = document.getNumberOfPages();
List<BufferedImage> images = new ArrayList<>();
String alias = keystore.aliases().nextElement();
Certificate[] certChain = keystore.getCertificateChain(alias);
X509Certificate[] x509CertChain = new X509Certificate[certChain.length];
// Create images of all pages
for (int i = 0; i < pageCount; i++) {
images.add(pdfRenderer.renderImageWithDPI(i, 300, colorType));
for (int i = 0; i < certChain.length; i++) {
x509CertChain[i] = (X509Certificate) certChain[i];
}
if (singleImage) {
// Combine all images into a single big image
BufferedImage combined = new BufferedImage(images.get(0).getWidth(), images.get(0).getHeight() * pageCount, BufferedImage.TYPE_INT_RGB);
Graphics g = combined.getGraphics();
for (int i = 0; i < images.size(); i++) {
g.drawImage(images.get(i), 0, i * images.get(0).getHeight(), null);
}
images = Arrays.asList(combined);
return x509CertChain;
}
// Create a ByteArrayOutputStream to save the image(s) to
ByteArrayOutputStream baos = new ByteArrayOutputStream();
if (singleImage) {
// Write the image to the output stream
ImageIO.write(images.get(0), imageType, baos);
public static KeyPair loadKeyPairFromKeystore(InputStream keystoreInputStream, String keystorePassword) throws Exception {
KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType());
keystore.load(keystoreInputStream, keystorePassword.toCharArray());
// Log that the image was successfully written to the byte array
logger.info("Image successfully written to byte array");
} else {
// Zip the images and return as byte array
try (ZipOutputStream zos = new ZipOutputStream(baos)) {
for (int i = 0; i < images.size(); i++) {
BufferedImage image = images.get(i);
try (ByteArrayOutputStream baosImage = new ByteArrayOutputStream()) {
ImageIO.write(image, imageType, baosImage);
String alias = keystore.aliases().nextElement();
PrivateKey privateKey = (PrivateKey) keystore.getKey(alias, keystorePassword.toCharArray());
Certificate cert = keystore.getCertificate(alias);
PublicKey publicKey = cert.getPublicKey();
// Add the image to the zip file
zos.putNextEntry(new ZipEntry(String.format("page_%d.%s", i + 1, imageType.toLowerCase())));
zos.write(baosImage.toByteArray());
}
}
// Log that the images were successfully written to the byte array
logger.info("Images successfully written to byte array as a zip");
}
}
return baos.toByteArray();
} catch (IOException e) {
// Log an error message if there is an issue converting the PDF to an image
logger.error("Error converting PDF to image", e);
throw e;
}
return new KeyPair(publicKey, privateKey);
}
public static byte[] overlayImage(byte[] pdfBytes, byte[] imageBytes, float x, float y, boolean everyPage) throws IOException {
@ -206,7 +245,7 @@ public class PdfUtils {
// Draw the image onto the page at the specified x and y coordinates
contentStream.drawImage(image, x, y);
logger.info("Image successfully overlayed onto PDF");
if (everyPage == false && i == 0) {
if (!everyPage && i == 0) {
break;
}
} catch (IOException e) {
@ -223,9 +262,6 @@ public class PdfUtils {
return baos.toByteArray();
}
public static ResponseEntity<byte[]> pdfDocToWebResponse(PDDocument document, String docName) throws IOException {
// Open Byte Array and save document to it
@ -236,47 +272,4 @@ public class PdfUtils {
return PdfUtils.boasToWebResponse(baos, docName);
}
public static ResponseEntity<byte[]> boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException {
return PdfUtils.bytesToWebResponse(baos.toByteArray(), docName);
}
public static ResponseEntity<byte[]> bytesToWebResponse(byte[] bytes, String docName) throws IOException {
// Return the PDF as a response
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_PDF);
headers.setContentLength(bytes.length);
headers.setContentDispositionFormData("attachment", docName);
return new ResponseEntity<>(bytes, headers, HttpStatus.OK);
}
public static KeyPair loadKeyPairFromKeystore(InputStream keystoreInputStream, String keystorePassword) throws Exception {
KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType());
keystore.load(keystoreInputStream, keystorePassword.toCharArray());
String alias = keystore.aliases().nextElement();
PrivateKey privateKey = (PrivateKey) keystore.getKey(alias, keystorePassword.toCharArray());
Certificate cert = keystore.getCertificate(alias);
PublicKey publicKey = cert.getPublicKey();
return new KeyPair(publicKey, privateKey);
}
public static X509Certificate[] loadCertificateChainFromKeystore(InputStream keystoreInputStream, String keystorePassword) throws Exception {
KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType());
keystore.load(keystoreInputStream, keystorePassword.toCharArray());
String alias = keystore.aliases().nextElement();
Certificate[] certChain = keystore.getCertificateChain(alias);
X509Certificate[] x509CertChain = new X509Certificate[certChain.length];
for (int i = 0; i < certChain.length; i++) {
x509CertChain[i] = (X509Certificate) certChain[i];
}
return x509CertChain;
}
}

View file

@ -9,22 +9,15 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Semaphore;
public class ProcessExecutor {
public enum Processes {
LIBRE_OFFICE,
OCR_MY_PDF
LIBRE_OFFICE, OCR_MY_PDF
}
private static final Map<Processes, ProcessExecutor> instances = new ConcurrentHashMap<>();
private final Semaphore semaphore;
private ProcessExecutor(int semaphoreLimit) {
this.semaphore = new Semaphore(semaphoreLimit);
}
public static ProcessExecutor getInstance(Processes processType) {
return instances.computeIfAbsent(processType, key -> {
int semaphoreLimit = switch (key) {
@ -35,6 +28,12 @@ public class ProcessExecutor {
});
}
private final Semaphore semaphore;
private ProcessExecutor(int semaphoreLimit) {
this.semaphore = new Semaphore(semaphoreLimit);
}
public int runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
int exitCode = 1;
semaphore.acquire();
@ -98,5 +97,4 @@ public class ProcessExecutor {
return exitCode;
}
}

View file

@ -1,4 +1,5 @@
package stirling.software.SPDF.utils;
import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
@ -12,8 +13,8 @@ import org.apache.pdfbox.cos.COSString;
public class WatermarkRemover extends PDFStreamEngine {
private final String watermarkText;
private final Pattern pattern;
private final String watermarkText;
public WatermarkRemover(String watermarkText) {
this.watermarkText = watermarkText;
@ -30,7 +31,7 @@ public class WatermarkRemover extends PDFStreamEngine {
}
if (processText) {
for(int j = 0 ; j < operands.size(); ++j) {
for (int j = 0; j < operands.size(); ++j) {
COSBase operand = operands.get(j);
if (operand instanceof COSString) {
COSString cosString = (COSString) operand;
@ -61,7 +62,6 @@ public class WatermarkRemover extends PDFStreamEngine {
}
}
}
}
super.processOperator(operator, operands);

View file

@ -238,7 +238,7 @@ function compareVersions(version1, version2) {
<li class="nav-item nav-item-separator"></li>
<li class="nav-item dropdown" th:classappend="${currentPage}=='add-password' OR ${currentPage}=='remove-password' OR ${currentPage}=='change-permissions' OR ${currentPage}=='add-watermark' OR ${currentPage}=='remove-watermark' ? 'active' : ''">
<li class="nav-item dropdown" th:classappend="${currentPage}=='add-password' OR ${currentPage}=='remove-password' OR ${currentPage}=='add-watermark' OR ${currentPage}=='remove-watermark' ? 'active' : ''">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
<img class="icon" src="images/shield-check.svg" alt="icon" style="width: 16px; height: 16px; vertical-align: middle;"> <span class="icon-text" th:text="#{navbar.security}"></span>
</a>
@ -255,14 +255,11 @@ function compareVersions(version1, version2) {
<a class="dropdown-item" href="#" th:href="@{add-watermark}" th:classappend="${currentPage}=='add-watermark' ? 'active' : ''">
<img class="icon" src="images/droplet.svg" alt="icon" style="width: 16px; height: 16px; vertical-align: middle;"> <span class="icon-text" th:text="#{home.watermark.title}"></span>
</a>
<a class="dropdown-item" href="#" th:href="@{change-metadata}" th:classappend="${currentPage}=='change-metadata' ? 'active' : ''">
<img class="icon" src="images/clipboard-data.svg" alt="icon" style="width: 16px; height: 16px; vertical-align: middle;"> <span class="icon-text" th:text="#{home.changeMetadata.title}"></span>
</a>
</div>
</li>
<li class="nav-item nav-item-separator"></li>
<li class="nav-item dropdown" th:classappend="${currentPage}=='add-image' OR ${currentPage}=='ocr-pdf' OR ${currentPage}=='extract-images' OR ${currentPage}=='compress-pdf' ? 'active' : ''">
<li class="nav-item dropdown" th:classappend="${currentPage}=='add-image' OR ${currentPage}=='ocr-pdf' OR ${currentPage}=='change-permissions' OR ${currentPage}=='extract-images' OR ${currentPage}=='compress-pdf' ? 'active' : ''">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
<img class="icon" src="images/card-list.svg" alt="icon" style="width: 16px; height: 16px; vertical-align: middle;">
<span class="icon-text" th:text="#{navbar.other}"></span>
@ -281,6 +278,9 @@ function compareVersions(version1, version2) {
<a class="dropdown-item" href="#" th:href="@{extract-images}" th:classappend="${currentPage}=='extract-images' ? 'active' : ''">
<img class="icon" src="images/images.svg" alt="icon" style="width: 16px; height: 16px; vertical-align: middle;"> <span class="icon-text" th:text="#{home.extractImages.title}"></span>
</a>
<a class="dropdown-item" href="#" th:href="@{change-metadata}" th:classappend="${currentPage}=='change-metadata' ? 'active' : ''">
<img class="icon" src="images/clipboard-data.svg" alt="icon" style="width: 16px; height: 16px; vertical-align: middle;"> <span class="icon-text" th:text="#{home.changeMetadata.title}"></span>
</a>
</div>
</li>

View file

@ -14,8 +14,7 @@
<div class="row justify-content-center">
<div class="col-md-6">
<h2 th:text="#{ocr.header}"></h2>
<form action="#" th:action="@{/ocr-pdf}" method="post" enctype="multipart/form-data" class="mb-3">
<form th:if="${#lists.size(languages) > 0}" action="#" th:action="@{/ocr-pdf}" method="post" enctype="multipart/form-data" class="mb-3">
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
<div class="form-group">
<label for="languages" class="form-label" th:text="#{ocr.selectText.1}"></label>