changes to script executor and init
This commit is contained in:
parent
ca12d040e1
commit
b5b4636e56
14 changed files with 85 additions and 32 deletions
|
@ -5,5 +5,17 @@ echo "Copying original files without overwriting existing files"
|
|||
mkdir -p /usr/share/tesseract-ocr
|
||||
cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr
|
||||
|
||||
# Check if TESSERACT_LANGS environment variable is set and is not empty
|
||||
if [[ -n "$TESSERACT_LANGS" ]]; then
|
||||
# Convert comma-separated values to a space-separated list
|
||||
LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
|
||||
|
||||
# Install each language pack
|
||||
for LANG in $LANGS; do
|
||||
apt-get install -y "tesseract-ocr-$LANG"
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
# Run the main command
|
||||
exec "$@"
|
|
@ -21,6 +21,7 @@ import io.swagger.v3.oas.annotations.Operation;
|
|||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.GeneralUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
|
@ -59,7 +60,7 @@ public class ConvertHtmlToPDF {
|
|||
command.add("weasyprint");
|
||||
command.add(tempInputFile.toString());
|
||||
command.add(tempOutputFile.toString());
|
||||
int returnCode = 0;
|
||||
ProcessExecutorResult returnCode;
|
||||
if (originalFilename.endsWith(".zip")) {
|
||||
returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT)
|
||||
.runCommandWithOutputHandling(command, tempInputFile.getParent().toFile());
|
||||
|
|
|
@ -19,6 +19,7 @@ import io.swagger.v3.oas.annotations.Operation;
|
|||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
|
@ -41,7 +42,7 @@ public class ConvertOfficeController {
|
|||
|
||||
// Run the LibreOffice command
|
||||
List<String> command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", "-o", tempOutputFile.toString(), tempInputFile.toString()));
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the converted PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
@ -62,10 +63,10 @@ public class ConvertOfficeController {
|
|||
summary = "Convert a file to a PDF using LibreOffice",
|
||||
description = "This endpoint converts a given file to a PDF using LibreOffice API Input:Any Output:PDF Type:SISO"
|
||||
)
|
||||
public ResponseEntity<byte[]> processPdfWithOCR(
|
||||
public ResponseEntity<byte[]> processFileToPDF(
|
||||
@RequestPart(required = true, value = "fileInput")
|
||||
@Parameter(
|
||||
description = "The input file to be converted to a PDF file using OCR",
|
||||
description = "The input file to be converted to a PDF file using LibreOffice",
|
||||
required = true
|
||||
)
|
||||
MultipartFile inputFile
|
||||
|
|
|
@ -16,6 +16,7 @@ import io.swagger.v3.oas.annotations.Operation;
|
|||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
|
@ -49,7 +50,7 @@ public class ConvertPDFToPDFA {
|
|||
command.add(tempInputFile.toString());
|
||||
command.add(tempOutputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
|
|
@ -17,6 +17,7 @@ import io.swagger.v3.oas.annotations.Parameter;
|
|||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.GeneralUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
|
@ -49,7 +50,7 @@ public class ConvertWebsiteToPDF {
|
|||
command.add(URL);
|
||||
command.add(tempOutputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
|
|
@ -31,6 +31,7 @@ import io.swagger.v3.oas.annotations.Parameter;
|
|||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
|
@ -86,10 +87,10 @@ public class BlankPageController {
|
|||
List<String> command = new ArrayList<>(Arrays.asList("python3", System.getProperty("user.dir") + "/scripts/detect-blank-pages.py", tempFile.toString() ,"--threshold", String.valueOf(threshold), "--white_percent", String.valueOf(whitePercent)));
|
||||
|
||||
// Run CLI command
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
|
||||
|
||||
// does contain data
|
||||
if (returnCode == 0) {
|
||||
if (returnCode.getRc() == 0) {
|
||||
System.out.println("page " + pageIndex + " has image which is not blank");
|
||||
pagesToKeepIndex.add(pageIndex);
|
||||
} else {
|
||||
|
|
|
@ -34,6 +34,7 @@ import io.swagger.v3.oas.annotations.media.Schema;
|
|||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.GeneralUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
|
@ -116,7 +117,7 @@ public class CompressController {
|
|||
command.add("-sOutputFile=" + tempOutputFile.toString());
|
||||
command.add(tempInputFile.toString());
|
||||
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
|
||||
|
||||
// Check if file size is within expected size or not auto mode so instantly finish
|
||||
long outputFileSize = Files.size(tempOutputFile);
|
||||
|
|
|
@ -33,6 +33,7 @@ import io.swagger.v3.oas.annotations.Operation;
|
|||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
|
@ -117,7 +118,7 @@ public class ExtractImageScansController {
|
|||
|
||||
|
||||
// Run CLI command
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the output photos in temp directory
|
||||
List<Path> tempOutputFiles = Files.list(tempDir).sorted().collect(Collectors.toList());
|
||||
|
|
|
@ -29,6 +29,7 @@ import io.swagger.v3.oas.annotations.Parameter;
|
|||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
|
@ -141,8 +142,12 @@ public class OCRController {
|
|||
command.addAll(Arrays.asList("--language", languageOption, tempInputFile.toString(), tempOutputFile.toString()));
|
||||
|
||||
// Run CLI command
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
|
||||
ProcessExecutorResult result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
if(result.getRc() != 0 && result.getMessages().contains("multiprocessing/synchronize.py") && result.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
|
||||
command.add("--jobs");
|
||||
command.add("1");
|
||||
result = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -153,7 +158,7 @@ public class OCRController {
|
|||
|
||||
List<String> gsCommand = Arrays.asList("gs", "-sDEVICE=pdfwrite", "-dFILTERIMAGE", "-o", tempPdfWithoutImages.toString(), tempOutputFile.toString());
|
||||
|
||||
int gsReturnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand);
|
||||
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand);
|
||||
tempOutputFile = tempPdfWithoutImages;
|
||||
}
|
||||
// Read the OCR processed PDF file
|
||||
|
|
|
@ -18,6 +18,7 @@ import io.swagger.v3.oas.annotations.Operation;
|
|||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
|
@ -51,7 +52,7 @@ public class RepairController {
|
|||
command.add(tempInputFile.toString());
|
||||
|
||||
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(command);
|
||||
|
||||
// Read the optimized PDF file
|
||||
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
|
||||
|
|
|
@ -35,7 +35,7 @@ public class PasswordController {
|
|||
@RequestPart(required = true, value = "fileInput")
|
||||
@Parameter(description = "The input PDF file from which the password should be removed", required = true)
|
||||
MultipartFile fileInput,
|
||||
@RequestParam(name = "password")
|
||||
@RequestPart(name = "password")
|
||||
@Parameter(description = "The password of the PDF file", required = true)
|
||||
String password) throws IOException {
|
||||
PDDocument document = PDDocument.load(fileInput.getBytes(), password);
|
||||
|
@ -52,37 +52,37 @@ public class PasswordController {
|
|||
@RequestPart(required = true, value = "fileInput")
|
||||
@Parameter(description = "The input PDF file to which the password should be added", required = true)
|
||||
MultipartFile fileInput,
|
||||
@RequestParam(defaultValue = "", name = "ownerPassword")
|
||||
@RequestPart(value = "", name = "ownerPassword")
|
||||
@Parameter(description = "The owner password to be added to the PDF file (Restricts what can be done with the document once it is opened)")
|
||||
String ownerPassword,
|
||||
@RequestParam(defaultValue = "", name = "password")
|
||||
@RequestPart( name = "password")
|
||||
@Parameter(description = "The password to be added to the PDF file (Restricts the opening of the document itself.)")
|
||||
String password,
|
||||
@RequestParam(defaultValue = "128", name = "keyLength")
|
||||
@RequestPart( name = "keyLength")
|
||||
@Parameter(description = "The length of the encryption key", schema = @Schema(allowableValues = {"40", "128", "256"}))
|
||||
int keyLength,
|
||||
@RequestParam(defaultValue = "false", name = "canAssembleDocument")
|
||||
@RequestPart( name = "canAssembleDocument")
|
||||
@Parameter(description = "Whether the document assembly is allowed", example = "false")
|
||||
boolean canAssembleDocument,
|
||||
@RequestParam(defaultValue = "false", name = "canExtractContent")
|
||||
@RequestPart( name = "canExtractContent")
|
||||
@Parameter(description = "Whether content extraction for accessibility is allowed", example = "false")
|
||||
boolean canExtractContent,
|
||||
@RequestParam(defaultValue = "false", name = "canExtractForAccessibility")
|
||||
@RequestPart( name = "canExtractForAccessibility")
|
||||
@Parameter(description = "Whether content extraction for accessibility is allowed", example = "false")
|
||||
boolean canExtractForAccessibility,
|
||||
@RequestParam(defaultValue = "false", name = "canFillInForm")
|
||||
@RequestPart( name = "canFillInForm")
|
||||
@Parameter(description = "Whether form filling is allowed", example = "false")
|
||||
boolean canFillInForm,
|
||||
@RequestParam(defaultValue = "false", name = "canModify")
|
||||
@RequestPart( name = "canModify")
|
||||
@Parameter(description = "Whether the document modification is allowed", example = "false")
|
||||
boolean canModify,
|
||||
@RequestParam(defaultValue = "false", name = "canModifyAnnotations")
|
||||
@RequestPart( name = "canModifyAnnotations")
|
||||
@Parameter(description = "Whether modification of annotations is allowed", example = "false")
|
||||
boolean canModifyAnnotations,
|
||||
@RequestParam(defaultValue = "false", name = "canPrint")
|
||||
@RequestPart(name = "canPrint")
|
||||
@Parameter(description = "Whether printing of the document is allowed", example = "false")
|
||||
boolean canPrint,
|
||||
@RequestParam(defaultValue = "false", name = "canPrintFaithful")
|
||||
@RequestPart( name = "canPrintFaithful")
|
||||
@Parameter(description = "Whether faithful printing is allowed", example = "false")
|
||||
boolean canPrintFaithful
|
||||
) throws IOException {
|
||||
|
|
|
@ -10,6 +10,8 @@ import io.swagger.v3.oas.annotations.tags.Tag;
|
|||
@Controller
|
||||
@Tag(name = "Security", description = "Security APIs")
|
||||
public class SecurityWebController {
|
||||
|
||||
|
||||
@GetMapping("/add-password")
|
||||
@Hidden
|
||||
public String addPasswordForm(Model model) {
|
||||
|
|
|
@ -20,6 +20,8 @@ import org.springframework.http.MediaType;
|
|||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
||||
|
||||
public class PDFToFile {
|
||||
public ResponseEntity<byte[]> processPdfToOfficeFormat(MultipartFile inputFile, String outputFormat, String libreOfficeFilter) throws IOException, InterruptedException {
|
||||
|
||||
|
@ -53,7 +55,7 @@ public class PDFToFile {
|
|||
// Run the LibreOffice command
|
||||
List<String> command = new ArrayList<>(
|
||||
Arrays.asList("soffice", "--infilter=" + libreOfficeFilter, "--convert-to", outputFormat, "--outdir", tempOutputDir.toString(), tempInputFile.toString()));
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
|
||||
ProcessExecutorResult returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
|
||||
|
||||
// Get output files
|
||||
List<File> outputFiles = Arrays.asList(tempOutputDir.toFile().listFiles());
|
||||
|
|
|
@ -37,11 +37,12 @@ public class ProcessExecutor {
|
|||
private ProcessExecutor(int semaphoreLimit) {
|
||||
this.semaphore = new Semaphore(semaphoreLimit);
|
||||
}
|
||||
public int runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
|
||||
public ProcessExecutorResult runCommandWithOutputHandling(List<String> command) throws IOException, InterruptedException {
|
||||
return runCommandWithOutputHandling(command, null);
|
||||
}
|
||||
public int runCommandWithOutputHandling(List<String> command, File workingDirectory) throws IOException, InterruptedException {
|
||||
public ProcessExecutorResult runCommandWithOutputHandling(List<String> command, File workingDirectory) throws IOException, InterruptedException {
|
||||
int exitCode = 1;
|
||||
String messages = "";
|
||||
semaphore.acquire();
|
||||
try {
|
||||
|
||||
|
@ -89,14 +90,16 @@ public class ProcessExecutor {
|
|||
// Wait for the reader threads to finish
|
||||
errorReaderThread.join();
|
||||
outputReaderThread.join();
|
||||
|
||||
|
||||
if (outputLines.size() > 0) {
|
||||
String outputMessage = String.join("\n", outputLines);
|
||||
messages += outputMessage;
|
||||
System.out.println("Command output:\n" + outputMessage);
|
||||
}
|
||||
|
||||
if (errorLines.size() > 0) {
|
||||
String errorMessage = String.join("\n", errorLines);
|
||||
messages += errorMessage;
|
||||
System.out.println("Command error output:\n" + errorMessage);
|
||||
if (exitCode != 0) {
|
||||
throw new IOException("Command process failed with exit code " + exitCode + ". Error message: " + errorMessage);
|
||||
|
@ -105,7 +108,28 @@ public class ProcessExecutor {
|
|||
} finally {
|
||||
semaphore.release();
|
||||
}
|
||||
return exitCode;
|
||||
return new ProcessExecutorResult(exitCode, messages);
|
||||
}
|
||||
public class ProcessExecutorResult{
|
||||
int rc;
|
||||
String messages;
|
||||
public ProcessExecutorResult(int rc, String messages) {
|
||||
this.rc = rc;
|
||||
this.messages = messages;
|
||||
}
|
||||
public int getRc() {
|
||||
return rc;
|
||||
}
|
||||
public void setRc(int rc) {
|
||||
this.rc = rc;
|
||||
}
|
||||
public String getMessages() {
|
||||
return messages;
|
||||
}
|
||||
public void setMessages(String messages) {
|
||||
this.messages = messages;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue