changes to script executor and init

This commit is contained in:
Anthony Stirling 2023-07-29 13:53:30 +01:00
parent ca12d040e1
commit b5b4636e56
14 changed files with 85 additions and 32 deletions

View file

@ -5,5 +5,17 @@ echo "Copying original files without overwriting existing files"
mkdir -p /usr/share/tesseract-ocr
cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr
# Check if TESSERACT_LANGS environment variable is set and is not empty
if [[ -n "$TESSERACT_LANGS" ]]; then
# Convert comma-separated values to a space-separated list
LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
# Install each language pack
for LANG in $LANGS; do
apt-get install -y "tesseract-ocr-$LANG"
done
fi
# Run the main command
exec "$@"

View file

@ -21,6 +21,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@ -59,7 +60,7 @@ public class ConvertHtmlToPDF {
command.add("weasyprint");
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = 0;
ProcessExecutorResult returnCode;
if (originalFilename.endsWith(".zip")) {
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());

View file

@ -19,6 +19,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@ -41,7 +42,7 @@ public class ConvertOfficeController {
// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString()));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
@ -62,10 +63,10 @@ public class ConvertOfficeController {
summary = "Convert a file to a PDF using LibreOffice",
description = "This endpoint converts a given file to a PDF using LibreOffice API Input:Any Output:PDF Type:SISO"
)
public ResponseEntity<byte[]> processPdfWithOCR(
public ResponseEntity<byte[]> processFileToPDF(
@RequestPart(required = true, value = "fileInput")
@Parameter(
description = "The input file to be converted to a PDF file using OCR",
description = "The input file to be converted to a PDF file using LibreOffice",
required = true
)
MultipartFile inputFile

View file

@ -16,6 +16,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@ -49,7 +50,7 @@ public class ConvertPDFToPDFA {
command.add(tempInputFile.toString());
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);

View file

@ -17,6 +17,7 @@ import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@ -49,7 +50,7 @@ public class ConvertWebsiteToPDF {
command.add(URL);
command.add(tempOutputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);

View file

@ -31,6 +31,7 @@ import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@ -86,10 +87,10 @@ public class BlankPageController {
List<String> command = new ArrayList<>(Arrays.asList("python3", System.getProperty("user.dir") + "/scripts/detect-blank-pages.py", tempFile.toString() ,"--threshold", String.valueOf(threshold), "--white_percent", String.valueOf(whitePercent)));
// Run CLI command
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
// does contain data
if (returnCode == 0) {
if (returnCode.getRc() == 0) {
System.out.println("page " + pageIndex + " has image which is not blank");
pagesToKeepIndex.add(pageIndex);
} else {

View file

@ -34,6 +34,7 @@ import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.GeneralUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@ -116,7 +117,7 @@ public class CompressController {
command.add("-sOutputFile=" + tempOutputFile.toString());
command.add(tempInputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
// Check if file size is within expected size or not auto mode so instantly finish
long outputFileSize = Files.size(tempOutputFile);

View file

@ -33,6 +33,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@ -117,7 +118,7 @@ public class ExtractImageScansController {
// Run CLI command
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
// Read the output photos in temp directory
List<Path> tempOutputFiles = Files.list(tempDir).sorted().collect(Collectors.toList());

View file

@ -29,6 +29,7 @@ import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@ -141,8 +142,12 @@ public class OCRController {
command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString()));
// Run CLI command
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
ProcessExecutorResult result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
if(result.getRc() != 0 && result.getMessages().contains("multiprocessing/synchronize.py") && result.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
command.add("--jobs");
command.add("1");
result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
}
@ -153,7 +158,7 @@ public class OCRController {
List<String> gsCommand = Arrays.asList("gs", "-sDEVICE=pdfwrite", "-dFILTERIMAGE", "-o", tempPdfWithoutImages.toString(), tempOutputFile.toString());
int gsReturnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand);
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand);
tempOutputFile = tempPdfWithoutImages;
}
// Read the OCR processed PDF file

View file

@ -18,6 +18,7 @@ import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@ -51,7 +52,7 @@ public class RepairController {
command.add(tempInputFile.toString());
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);

View file

@ -35,7 +35,7 @@ public class PasswordController {
@RequestPart(required = true, value = "fileInput")
@Parameter(description = "The input PDF file from which the password should be removed", required = true)
MultipartFile fileInput,
@RequestParam(name = "password")
@RequestPart(name = "password")
@Parameter(description = "The password of the PDF file", required = true)
String password) throws IOException {
PDDocument document = PDDocument.load(fileInput.getBytes(), password);
@ -52,37 +52,37 @@ public class PasswordController {
@RequestPart(required = true, value = "fileInput")
@Parameter(description = "The input PDF file to which the password should be added", required = true)
MultipartFile fileInput,
@RequestParam(defaultValue = "", name = "ownerPassword")
@RequestPart(value = "", name = "ownerPassword")
@Parameter(description = "The owner password to be added to the PDF file (Restricts what can be done with the document once it is opened)")
String ownerPassword,
@RequestParam(defaultValue = "", name = "password")
@RequestPart( name = "password")
@Parameter(description = "The password to be added to the PDF file (Restricts the opening of the document itself.)")
String password,
@RequestParam(defaultValue = "128", name = "keyLength")
@RequestPart( name = "keyLength")
@Parameter(description = "The length of the encryption key", schema = @Schema(allowableValues = {"40", "128", "256"}))
int keyLength,
@RequestParam(defaultValue = "false", name = "canAssembleDocument")
@RequestPart( name = "canAssembleDocument")
@Parameter(description = "Whether the document assembly is allowed", example = "false")
boolean canAssembleDocument,
@RequestParam(defaultValue = "false", name = "canExtractContent")
@RequestPart( name = "canExtractContent")
@Parameter(description = "Whether content extraction for accessibility is allowed", example = "false")
boolean canExtractContent,
@RequestParam(defaultValue = "false", name = "canExtractForAccessibility")
@RequestPart( name = "canExtractForAccessibility")
@Parameter(description = "Whether content extraction for accessibility is allowed", example = "false")
boolean canExtractForAccessibility,
@RequestParam(defaultValue = "false", name = "canFillInForm")
@RequestPart( name = "canFillInForm")
@Parameter(description = "Whether form filling is allowed", example = "false")
boolean canFillInForm,
@RequestParam(defaultValue = "false", name = "canModify")
@RequestPart( name = "canModify")
@Parameter(description = "Whether the document modification is allowed", example = "false")
boolean canModify,
@RequestParam(defaultValue = "false", name = "canModifyAnnotations")
@RequestPart( name = "canModifyAnnotations")
@Parameter(description = "Whether modification of annotations is allowed", example = "false")
boolean canModifyAnnotations,
@RequestParam(defaultValue = "false", name = "canPrint")
@RequestPart(name = "canPrint")
@Parameter(description = "Whether printing of the document is allowed", example = "false")
boolean canPrint,
@RequestParam(defaultValue = "false", name = "canPrintFaithful")
@RequestPart( name = "canPrintFaithful")
@Parameter(description = "Whether faithful printing is allowed", example = "false")
boolean canPrintFaithful
) throws IOException {

View file

@ -10,6 +10,8 @@ import io.swagger.v3.oas.annotations.tags.Tag;
@Controller
@Tag(name = "Security", description = "Security APIs")
public class SecurityWebController {
@GetMapping("/add-password")
@Hidden
public String addPasswordForm(Model model) {

View file

@ -20,6 +20,8 @@ import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
public class PDFToFile {
public ResponseEntity<byte[]> processPdfToOfficeFormat(MultipartFile inputFile, String outputFormat, String libreOfficeFilter) throws IOException, InterruptedException {
@ -53,7 +55,7 @@ public class PDFToFile {
// Run the LibreOffice command
List<String> command = new ArrayList<>(
Arrays.asList("soffice", "--infilter=" + libreOfficeFilter, "--convert-to", outputFormat, "--outdir", tempOutputDir.toString(), tempInputFile.toString()));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
// Get output files
List<File> outputFiles = Arrays.asList(tempOutputDir.toFile().listFiles());

View file

@ -37,11 +37,12 @@ public class ProcessExecutor {
private ProcessExecutor(int semaphoreLimit) {
this.semaphore = new Semaphore(semaphoreLimit);
}
public int runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
public ProcessExecutorResult runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
return runCommandWithOutputHandling(command, null);
}
public int runCommandWithOutputHandling(List<String> command, File workingDirectory) throws IOException, InterruptedException {
public ProcessExecutorResult runCommandWithOutputHandling(List<String> command, File workingDirectory) throws IOException, InterruptedException {
int exitCode = 1;
String messages = "";
semaphore.acquire();
try {
@ -89,14 +90,16 @@ public class ProcessExecutor {
// Wait for the reader threads to finish
errorReaderThread.join();
outputReaderThread.join();
if (outputLines.size() > 0) {
String outputMessage = String.join("\n", outputLines);
messages += outputMessage;
System.out.println("Command output:\n" + outputMessage);
}
if (errorLines.size() > 0) {
String errorMessage = String.join("\n", errorLines);
messages += errorMessage;
System.out.println("Command error output:\n" + errorMessage);
if (exitCode != 0) {
throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage);
@ -105,7 +108,28 @@ public class ProcessExecutor {
} finally {
semaphore.release();
}
return exitCode;
return new ProcessExecutorResult(exitCode, messages);
}
public class ProcessExecutorResult{
int rc;
String messages;
public ProcessExecutorResult(int rc, String messages) {
this.rc = rc;
this.messages = messages;
}
public int getRc() {
return rc;
}
public void setRc(int rc) {
this.rc = rc;
}
public String getMessages() {
return messages;
}
public void setMessages(String messages) {
this.messages = messages;
}
}
}