diff --git a/scripts/init.sh b/scripts/init.sh index b45bf45f..99ff2226 100644 --- a/scripts/init.sh +++ b/scripts/init.sh @@ -5,5 +5,17 @@ echo "Copying original files without overwriting existing files" mkdir -p /usr/share/tesseract-ocr cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr +# Check if TESSERACT_LANGS environment variable is set and is not empty +if [[ -n "$TESSERACT_LANGS" ]]; then + # Convert comma-separated values to a space-separated list + LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ') + + # Install each language pack + for LANG in $LANGS; do + apt-get install -y "tesseract-ocr-$LANG" + done +fi + + # Run the main command exec "$@" \ No newline at end of file diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java index 2d792ee3..a5878b04 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java @@ -21,6 +21,7 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.utils.GeneralUtils; import stirling.software.SPDF.utils.ProcessExecutor; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -59,7 +60,7 @@ public class ConvertHtmlToPDF { command.add("weasyprint"); command.add(tempInputFile.toString()); command.add(tempOutputFile.toString()); - int returnCode = 0; + ProcessExecutorResult returnCode; if (originalFilename.endsWith(".zip")) { returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) .runCommandWithOutputHandling(command, tempInputFile.getParent().toFile()); diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java index 79be9e2e..3b9f278f 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java @@ -19,6 +19,7 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.utils.ProcessExecutor; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -41,7 +42,7 @@ public class ConvertOfficeController { // Run the LibreOffice command List command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString())); - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command); + ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command); // Read the converted PDF file byte[] pdfBytes = Files.readAllBytes(tempOutputFile); @@ -62,10 +63,10 @@ public class ConvertOfficeController { summary = "Convert a file to a PDF using LibreOffice", description = "This endpoint converts a given file to a PDF using LibreOffice API Input:Any Output:PDF Type:SISO" ) - public ResponseEntity processPdfWithOCR( + public ResponseEntity processFileToPDF( @RequestPart(required = true, value = "fileInput") @Parameter( - description = "The input file to be converted to a PDF file using OCR", + description = "The input file to be converted to a PDF file using LibreOffice", required = true ) MultipartFile inputFile diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java index 4ff2b4f2..6a99090b 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java @@ -16,6 +16,7 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.utils.ProcessExecutor; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -49,7 +50,7 @@ public class ConvertPDFToPDFA { command.add(tempInputFile.toString()); command.add(tempOutputFile.toString()); - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); + ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); // Read the optimized PDF file byte[] pdfBytes = Files.readAllBytes(tempOutputFile); diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java index 9167a6e4..57cf8f7f 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java @@ -17,6 +17,7 @@ import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.utils.GeneralUtils; import stirling.software.SPDF.utils.ProcessExecutor; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -49,7 +50,7 @@ public class ConvertWebsiteToPDF { command.add(URL); command.add(tempOutputFile.toString()); - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command); + ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command); // Read the optimized PDF file pdfBytes = Files.readAllBytes(tempOutputFile); diff --git a/src/main/java/stirling/software/SPDF/controller/api/other/BlankPageController.java b/src/main/java/stirling/software/SPDF/controller/api/other/BlankPageController.java index 6ed76edb..4f505b73 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/other/BlankPageController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/BlankPageController.java @@ -31,6 +31,7 @@ import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -86,10 +87,10 @@ public class BlankPageController { List command = new ArrayList<>(Arrays.asList("python3", System.getProperty("user.dir") + "/scripts/detect-blank-pages.py", tempFile.toString() ,"--threshold", String.valueOf(threshold), "--white_percent", String.valueOf(whitePercent))); // Run CLI command - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command); + ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command); // does contain data - if (returnCode == 0) { + if (returnCode.getRc() == 0) { System.out.println("page " + pageIndex + " has image which is not blank"); pagesToKeepIndex.add(pageIndex); } else { diff --git a/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java b/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java index 42ab6a41..381a6821 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java @@ -34,6 +34,7 @@ import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.utils.GeneralUtils; import stirling.software.SPDF.utils.ProcessExecutor; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -116,7 +117,7 @@ public class CompressController { command.add("-sOutputFile=" + tempOutputFile.toString()); command.add(tempInputFile.toString()); - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command); + ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command); // Check if file size is within expected size or not auto mode so instantly finish long outputFileSize = Files.size(tempOutputFile); diff --git a/src/main/java/stirling/software/SPDF/controller/api/other/ExtractImageScansController.java b/src/main/java/stirling/software/SPDF/controller/api/other/ExtractImageScansController.java index f9ac6761..55ff446c 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/other/ExtractImageScansController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/ExtractImageScansController.java @@ -33,6 +33,7 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.utils.ProcessExecutor; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -117,7 +118,7 @@ public class ExtractImageScansController { // Run CLI command - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command); + ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command); // Read the output photos in temp directory List tempOutputFiles = Files.list(tempDir).sorted().collect(Collectors.toList()); diff --git a/src/main/java/stirling/software/SPDF/controller/api/other/OCRController.java b/src/main/java/stirling/software/SPDF/controller/api/other/OCRController.java index c3c323f5..d6009c8a 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/other/OCRController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/OCRController.java @@ -29,6 +29,7 @@ import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.utils.ProcessExecutor; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -141,8 +142,12 @@ public class OCRController { command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString())); // Run CLI command - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); - + ProcessExecutorResult result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); + if(result.getRc() != 0 && result.getMessages().contains("multiprocessing/synchronize.py") && result.getMessages().contains("OSError: [Errno 38] Function not implemented")) { + command.add("--jobs"); + command.add("1"); + result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); + } @@ -153,7 +158,7 @@ public class OCRController { List gsCommand = Arrays.asList("gs", "-sDEVICE=pdfwrite", "-dFILTERIMAGE", "-o", tempPdfWithoutImages.toString(), tempOutputFile.toString()); - int gsReturnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand); + ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand); tempOutputFile = tempPdfWithoutImages; } // Read the OCR processed PDF file diff --git a/src/main/java/stirling/software/SPDF/controller/api/other/RepairController.java b/src/main/java/stirling/software/SPDF/controller/api/other/RepairController.java index 536f8c89..52644080 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/other/RepairController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/RepairController.java @@ -18,6 +18,7 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.utils.ProcessExecutor; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -51,7 +52,7 @@ public class RepairController { command.add(tempInputFile.toString()); - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command); + ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command); // Read the optimized PDF file byte[] pdfBytes = Files.readAllBytes(tempOutputFile); diff --git a/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java b/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java index fa6df6ef..b3afecf1 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java @@ -35,7 +35,7 @@ public class PasswordController { @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file from which the password should be removed", required = true) MultipartFile fileInput, - @RequestParam(name = "password") + @RequestPart(name = "password") @Parameter(description = "The password of the PDF file", required = true) String password) throws IOException { PDDocument document = PDDocument.load(fileInput.getBytes(), password); @@ -52,37 +52,37 @@ public class PasswordController { @RequestPart(required = true, value = "fileInput") @Parameter(description = "The input PDF file to which the password should be added", required = true) MultipartFile fileInput, - @RequestParam(defaultValue = "", name = "ownerPassword") + @RequestPart(value = "", name = "ownerPassword") @Parameter(description = "The owner password to be added to the PDF file (Restricts what can be done with the document once it is opened)") String ownerPassword, - @RequestParam(defaultValue = "", name = "password") + @RequestPart( name = "password") @Parameter(description = "The password to be added to the PDF file (Restricts the opening of the document itself.)") String password, - @RequestParam(defaultValue = "128", name = "keyLength") + @RequestPart( name = "keyLength") @Parameter(description = "The length of the encryption key", schema = @Schema(allowableValues = {"40", "128", "256"})) int keyLength, - @RequestParam(defaultValue = "false", name = "canAssembleDocument") + @RequestPart( name = "canAssembleDocument") @Parameter(description = "Whether the document assembly is allowed", example = "false") boolean canAssembleDocument, - @RequestParam(defaultValue = "false", name = "canExtractContent") + @RequestPart( name = "canExtractContent") @Parameter(description = "Whether content extraction for accessibility is allowed", example = "false") boolean canExtractContent, - @RequestParam(defaultValue = "false", name = "canExtractForAccessibility") + @RequestPart( name = "canExtractForAccessibility") @Parameter(description = "Whether content extraction for accessibility is allowed", example = "false") boolean canExtractForAccessibility, - @RequestParam(defaultValue = "false", name = "canFillInForm") + @RequestPart( name = "canFillInForm") @Parameter(description = "Whether form filling is allowed", example = "false") boolean canFillInForm, - @RequestParam(defaultValue = "false", name = "canModify") + @RequestPart( name = "canModify") @Parameter(description = "Whether the document modification is allowed", example = "false") boolean canModify, - @RequestParam(defaultValue = "false", name = "canModifyAnnotations") + @RequestPart( name = "canModifyAnnotations") @Parameter(description = "Whether modification of annotations is allowed", example = "false") boolean canModifyAnnotations, - @RequestParam(defaultValue = "false", name = "canPrint") + @RequestPart(name = "canPrint") @Parameter(description = "Whether printing of the document is allowed", example = "false") boolean canPrint, - @RequestParam(defaultValue = "false", name = "canPrintFaithful") + @RequestPart( name = "canPrintFaithful") @Parameter(description = "Whether faithful printing is allowed", example = "false") boolean canPrintFaithful ) throws IOException { diff --git a/src/main/java/stirling/software/SPDF/controller/web/SecurityWebController.java b/src/main/java/stirling/software/SPDF/controller/web/SecurityWebController.java index fe176f62..66de400e 100644 --- a/src/main/java/stirling/software/SPDF/controller/web/SecurityWebController.java +++ b/src/main/java/stirling/software/SPDF/controller/web/SecurityWebController.java @@ -10,6 +10,8 @@ import io.swagger.v3.oas.annotations.tags.Tag; @Controller @Tag(name = "Security", description = "Security APIs") public class SecurityWebController { + + @GetMapping("/add-password") @Hidden public String addPasswordForm(Model model) { diff --git a/src/main/java/stirling/software/SPDF/utils/PDFToFile.java b/src/main/java/stirling/software/SPDF/utils/PDFToFile.java index ffe1d93d..af658f79 100644 --- a/src/main/java/stirling/software/SPDF/utils/PDFToFile.java +++ b/src/main/java/stirling/software/SPDF/utils/PDFToFile.java @@ -20,6 +20,8 @@ import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.multipart.MultipartFile; +import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult; + public class PDFToFile { public ResponseEntity processPdfToOfficeFormat(MultipartFile inputFile, String outputFormat, String libreOfficeFilter) throws IOException, InterruptedException { @@ -53,7 +55,7 @@ public class PDFToFile { // Run the LibreOffice command List command = new ArrayList<>( Arrays.asList("soffice", "--infilter=" + libreOfficeFilter, "--convert-to", outputFormat, "--outdir", tempOutputDir.toString(), tempInputFile.toString())); - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command); + ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command); // Get output files List outputFiles = Arrays.asList(tempOutputDir.toFile().listFiles()); diff --git a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java index f2a7ed55..fe5c6717 100644 --- a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java +++ b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java @@ -37,11 +37,12 @@ public class ProcessExecutor { private ProcessExecutor(int semaphoreLimit) { this.semaphore = new Semaphore(semaphoreLimit); } - public int runCommandWithOutputHandling(List command) throws IOException, InterruptedException { + public ProcessExecutorResult runCommandWithOutputHandling(List command) throws IOException, InterruptedException { return runCommandWithOutputHandling(command, null); } - public int runCommandWithOutputHandling(List command, File workingDirectory) throws IOException, InterruptedException { + public ProcessExecutorResult runCommandWithOutputHandling(List command, File workingDirectory) throws IOException, InterruptedException { int exitCode = 1; + String messages = ""; semaphore.acquire(); try { @@ -89,14 +90,16 @@ public class ProcessExecutor { // Wait for the reader threads to finish errorReaderThread.join(); outputReaderThread.join(); - + if (outputLines.size() > 0) { String outputMessage = String.join("\n", outputLines); + messages += outputMessage; System.out.println("Command output:\n" + outputMessage); } if (errorLines.size() > 0) { String errorMessage = String.join("\n", errorLines); + messages += errorMessage; System.out.println("Command error output:\n" + errorMessage); if (exitCode != 0) { throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage); @@ -105,7 +108,28 @@ public class ProcessExecutor { } finally { semaphore.release(); } - return exitCode; + return new ProcessExecutorResult(exitCode, messages); + } + public class ProcessExecutorResult{ + int rc; + String messages; + public ProcessExecutorResult(int rc, String messages) { + this.rc = rc; + this.messages = messages; + } + public int getRc() { + return rc; + } + public void setRc(int rc) { + this.rc = rc; + } + public String getMessages() { + return messages; + } + public void setMessages(String messages) { + this.messages = messages; + } + + } - }