deletion changes

This commit is contained in:
Anthony Stirling 2024-05-27 17:53:18 +01:00
parent 6ffa80c386
commit 65f9438639
9 changed files with 198 additions and 201 deletions

View file

@ -3,14 +3,8 @@ name: "Build repo"
on:
push:
branches: ["main"]
paths-ignore:
- ".github/**"
- "**/*.md"
pull_request:
branches: ["main"]
paths-ignore:
- ".github/**"
- "**/*.md"
jobs:
build:
@ -36,7 +30,7 @@ jobs:
- uses: gradle/actions/setup-gradle@v3
with:
gradle-version: 7.6
gradle-version: 8.7
- name: Build with Gradle
run: ./gradlew build --no-build-cache

View file

@ -3,7 +3,6 @@ package stirling.software.SPDF.controller.api.converters;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -41,34 +40,35 @@ public class ConvertOfficeController {
// Save the uploaded file to a temporary location
Path tempInputFile =
Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename));
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
inputFile.transferTo(tempInputFile);
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Run the LibreOffice command
List<String> command =
new ArrayList<>(
Arrays.asList(
"unoconv",
"-vvv",
"-f",
"pdf",
"-o",
tempOutputFile.toString(),
tempInputFile.toString()));
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
.runCommandWithOutputHandling(command);
try {
// Run the LibreOffice command
List<String> command =
new ArrayList<>(
Arrays.asList(
"unoconv",
"-vvv",
"-f",
"pdf",
"-o",
tempOutputFile.toString(),
tempInputFile.toString()));
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
.runCommandWithOutputHandling(command);
// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
return pdfBytes;
// Read the converted PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
return pdfBytes;
} finally {
// Clean up the temporary files
if (tempInputFile != null) Files.deleteIfExists(tempInputFile);
Files.deleteIfExists(tempOutputFile);
}
}
private boolean isValidFileExtension(String fileExtension) {

View file

@ -61,8 +61,8 @@ public class ConvertPDFToPDFA {
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
Files.deleteIfExists(tempInputFile);
Files.deleteIfExists(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename =

View file

@ -136,10 +136,10 @@ public class CompressController {
// Increase optimization level for next iteration
optimizeLevel++;
if (autoMode && optimizeLevel > 4) {
System.out.println("Skipping level 5 due to bad results in auto mode");
logger.info("Skipping level 5 due to bad results in auto mode");
sizeMet = true;
} else {
System.out.println(
logger.info(
"Increasing ghostscript optimisation level to " + optimizeLevel);
}
}
@ -230,10 +230,10 @@ public class CompressController {
if (currentSize > expectedOutputSize) {
// Log the current file size and scaleFactor
System.out.println(
logger.info(
"Current file size: "
+ FileUtils.byteCountToDisplaySize(currentSize));
System.out.println("Current scale factor: " + scaleFactor);
logger.info("Current scale factor: " + scaleFactor);
// The file is still too large, reduce scaleFactor and try again
scaleFactor *= 0.9f; // reduce scaleFactor by 10%
@ -256,7 +256,6 @@ public class CompressController {
}
}
}
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);
@ -269,17 +268,18 @@ public class CompressController {
// Read the original file again
pdfBytes = Files.readAllBytes(tempInputFile);
}
// Return the optimized PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_Optimized.pdf";
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
} finally {
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
// deleted by multipart file handler deu to transferTo?
// Files.deleteIfExists(tempInputFile);
Files.deleteIfExists(tempOutputFile);
}
// Return the optimized PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_Optimized.pdf";
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
}

View file

@ -5,7 +5,6 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -103,10 +102,7 @@ public class ExtractImageScansController {
}
} else {
tempInputFile = Files.createTempFile("input_", "." + extension);
Files.copy(
form.getFileInput().getInputStream(),
tempInputFile,
StandardCopyOption.REPLACE_EXISTING);
form.getFileInput().transferTo(tempInputFile);
// Add input file path to images list
images.add(tempInputFile.toString());
}
@ -176,11 +172,14 @@ public class ExtractImageScansController {
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Clean up the temporary zip file
Files.delete(tempZipFile);
Files.deleteIfExists(tempZipFile);
return WebResponseUtils.bytesToWebResponse(
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
} else {
} if (processedImageBytes.size() == 0) {
throw new IllegalArgumentException("No images detected");
}else {
// Return the processed image as a response
byte[] imageBytes = processedImageBytes.get(0);
return WebResponseUtils.bytesToWebResponse(
@ -201,7 +200,7 @@ public class ExtractImageScansController {
if (tempZipFile != null && Files.exists(tempZipFile)) {
try {
Files.delete(tempZipFile);
Files.deleteIfExists(tempZipFile);
} catch (IOException e) {
logger.error("Failed to delete temporary zip file: " + tempZipFile, e);
}

View file

@ -5,7 +5,6 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -91,139 +90,145 @@ public class OCRController {
}
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
// Prepare the output file path
Path sidecarTextPath = null;
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
try {
inputFile.transferTo(tempInputFile.toFile());
List<String> command =
new ArrayList<>(
Arrays.asList(
"ocrmypdf",
"--verbose",
"2",
"--output-type",
"pdf",
"--pdf-renderer",
ocrRenderType));
// Run OCR Command
String languageOption = String.join("+", selectedLanguages);
if (sidecar != null && sidecar) {
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
command.add("--sidecar");
command.add(sidecarTextPath.toString());
}
if (deskew != null && deskew) {
command.add("--deskew");
}
if (clean != null && clean) {
command.add("--clean");
}
if (cleanFinal != null && cleanFinal) {
command.add("--clean-final");
}
if (ocrType != null && !"".equals(ocrType)) {
if ("skip-text".equals(ocrType)) {
command.add("--skip-text");
} else if ("force-ocr".equals(ocrType)) {
command.add("--force-ocr");
} else if ("Normal".equals(ocrType)) {
List<String> command =
new ArrayList<>(
Arrays.asList(
"ocrmypdf",
"--verbose",
"2",
"--output-type",
"pdf",
"--pdf-renderer",
ocrRenderType));
if (sidecar != null && sidecar) {
sidecarTextPath = Files.createTempFile("sidecar", ".txt");
command.add("--sidecar");
command.add(sidecarTextPath.toString());
}
}
command.addAll(
Arrays.asList(
"--language",
languageOption,
tempInputFile.toString(),
tempOutputFile.toString()));
if (deskew != null && deskew) {
command.add("--deskew");
}
if (clean != null && clean) {
command.add("--clean");
}
if (cleanFinal != null && cleanFinal) {
command.add("--clean-final");
}
if (ocrType != null && !"".equals(ocrType)) {
if ("skip-text".equals(ocrType)) {
command.add("--skip-text");
} else if ("force-ocr".equals(ocrType)) {
command.add("--force-ocr");
} else if ("Normal".equals(ocrType)) {
// Run CLI command
ProcessExecutorResult result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
.runCommandWithOutputHandling(command);
if (result.getRc() != 0
&& result.getMessages().contains("multiprocessing/synchronize.py")
&& result.getMessages().contains("OSError: [Errno 38] Function not implemented")) {
command.add("--jobs");
command.add("1");
result =
}
}
command.addAll(
Arrays.asList(
"--language",
languageOption,
tempInputFile.toString(),
tempOutputFile.toString()));
// Run CLI command
ProcessExecutorResult result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
.runCommandWithOutputHandling(command);
}
// Remove images from the OCR processed PDF if the flag is set to true
if (removeImagesAfter != null && removeImagesAfter) {
Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf");
List<String> gsCommand =
Arrays.asList(
"gs",
"-sDEVICE=pdfwrite",
"-dFILTERIMAGE",
"-o",
tempPdfWithoutImages.toString(),
tempOutputFile.toString());
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(gsCommand);
tempOutputFile = tempPdfWithoutImages;
}
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
// Return the OCR processed PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_OCR.pdf";
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut =
new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
Files.copy(tempOutputFile, zipOut);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
zipOut.putNextEntry(txtEntry);
Files.copy(sidecarTextPath, zipOut);
zipOut.closeEntry();
if (result.getRc() != 0
&& result.getMessages().contains("multiprocessing/synchronize.py")
&& result.getMessages()
.contains("OSError: [Errno 38] Function not implemented")) {
command.add("--jobs");
command.add("1");
result =
ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF)
.runCommandWithOutputHandling(command);
}
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Remove images from the OCR processed PDF if the flag is set to true
if (removeImagesAfter != null && removeImagesAfter) {
Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf");
// Clean up the temporary zip file
Files.delete(tempZipFile);
Files.delete(tempOutputFile);
Files.delete(sidecarTextPath);
List<String> gsCommand =
Arrays.asList(
"gs",
"-sDEVICE=pdfwrite",
"-dFILTERIMAGE",
"-o",
tempPdfWithoutImages.toString(),
tempOutputFile.toString());
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(gsCommand);
tempOutputFile = tempPdfWithoutImages;
}
// Read the OCR processed PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Return the zip file containing both the PDF and the text file
return WebResponseUtils.bytesToWebResponse(
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
} else {
// Return the OCR processed PDF as a response
Files.delete(tempOutputFile);
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_OCR.pdf";
if (sidecar != null && sidecar) {
// Create a zip file containing both the PDF and the text file
String outputZipFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_OCR.zip";
Path tempZipFile = Files.createTempFile("output_", ".zip");
try (ZipOutputStream zipOut =
new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) {
// Add PDF file to the zip
ZipEntry pdfEntry = new ZipEntry(outputFilename);
zipOut.putNextEntry(pdfEntry);
Files.copy(tempOutputFile, zipOut);
zipOut.closeEntry();
// Add text file to the zip
ZipEntry txtEntry = new ZipEntry(outputFilename.replace(".pdf", ".txt"));
zipOut.putNextEntry(txtEntry);
Files.copy(sidecarTextPath, zipOut);
zipOut.closeEntry();
}
byte[] zipBytes = Files.readAllBytes(tempZipFile);
// Clean up the temporary zip file
Files.deleteIfExists(tempZipFile);
Files.deleteIfExists(tempOutputFile);
Files.deleteIfExists(sidecarTextPath);
// Return the zip file containing both the PDF and the text file
return WebResponseUtils.bytesToWebResponse(
zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM);
} else {
// Return the OCR processed PDF as a response
Files.deleteIfExists(tempOutputFile);
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
}
} finally {
// Clean up the temporary files
Files.deleteIfExists(tempOutputFile);
// Comment out as transferTo makes multipart handle cleanup
// Files.deleteIfExists(tempInputFile);
if (sidecarTextPath != null) {
Files.deleteIfExists(sidecarTextPath);
}
}
}
}

View file

@ -41,34 +41,36 @@ public class RepairController {
MultipartFile inputFile = request.getFileInput();
// Save the uploaded file to a temporary location
Path tempInputFile = Files.createTempFile("input_", ".pdf");
inputFile.transferTo(tempInputFile.toFile());
// Prepare the output file path
Path tempOutputFile = Files.createTempFile("output_", ".pdf");
byte[] pdfBytes = null;
inputFile.transferTo(tempInputFile.toFile());
try {
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-o");
command.add(tempOutputFile.toString());
command.add("-sDEVICE=pdfwrite");
command.add(tempInputFile.toString());
List<String> command = new ArrayList<>();
command.add("gs");
command.add("-o");
command.add(tempOutputFile.toString());
command.add("-sDEVICE=pdfwrite");
command.add(tempInputFile.toString());
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(command);
// Read the optimized PDF file
byte[] pdfBytes = Files.readAllBytes(tempOutputFile);
// Clean up the temporary files
Files.delete(tempInputFile);
Files.delete(tempOutputFile);
ProcessExecutorResult returnCode =
ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT)
.runCommandWithOutputHandling(command);
// Read the optimized PDF file
pdfBytes = Files.readAllBytes(tempOutputFile);
// Return the optimized PDF as a response
String outputFilename =
Filenames.toSimpleFileName(inputFile.getOriginalFilename())
.replaceFirst("[.][^.]+$", "")
+ "_repaired.pdf";
return WebResponseUtils.bytesToWebResponse(pdfBytes, outputFilename);
} finally {
// Clean up the temporary files
Files.deleteIfExists(tempInputFile);
Files.deleteIfExists(tempOutputFile);
}
}
}

View file

@ -79,8 +79,8 @@ public class FileToPdf {
} finally {
// Clean up temporary files
Files.delete(tempOutputFile);
Files.delete(tempInputFile);
Files.deleteIfExists(tempOutputFile);
Files.deleteIfExists(tempInputFile);
}
return pdfBytes;

View file

@ -6,7 +6,6 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -47,8 +46,7 @@ public class PDFToFile {
try {
// Save the uploaded file to a temporary location
tempInputFile = Files.createTempFile("input_", ".pdf");
Files.copy(
inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
inputFile.transferTo(tempInputFile);
// Prepare the output directory
tempOutputDir = Files.createTempDirectory("output_");
@ -85,7 +83,7 @@ public class PDFToFile {
} finally {
// Clean up the temporary files
if (tempInputFile != null) Files.delete(tempInputFile);
if (tempInputFile != null) Files.deleteIfExists(tempInputFile);
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
}
@ -127,8 +125,7 @@ public class PDFToFile {
try {
// Save the uploaded file to a temporary location
tempInputFile = Files.createTempFile("input_", ".pdf");
Files.copy(
inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
inputFile.transferTo(tempInputFile);
// Prepare the output directory
tempOutputDir = Files.createTempDirectory("output_");
@ -180,7 +177,7 @@ public class PDFToFile {
} finally {
// Clean up the temporary files
if (tempInputFile != null) Files.delete(tempInputFile);
Files.deleteIfExists(tempInputFile);
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
}
System.out.println("fileBytes=" + fileBytes.length);