refactor: replace ImageFinder with getAllImages using strategy behind ExtractImagesController

This commit is contained in:
sbplat 2024-01-29 11:23:58 -05:00
parent 6f3e317484
commit 53afb865c5
4 changed files with 28 additions and 141 deletions

View file

@ -68,7 +68,9 @@ public class AutoSplitPdfController {
splitDocuments.add(new PDDocument()); splitDocuments.add(new PDDocument());
} }
if (!splitDocuments.isEmpty() && !QR_CONTENT.equals(result) && !QR_CONTENT_OLD.equals(result)) { if (!splitDocuments.isEmpty()
&& !QR_CONTENT.equals(result)
&& !QR_CONTENT_OLD.equals(result)) {
splitDocuments.get(splitDocuments.size() - 1).addPage(document.getPage(page)); splitDocuments.get(splitDocuments.size() - 1).addPage(document.getPage(page));
} else if (page == 0) { } else if (page == 0) {
PDDocument firstDocument = new PDDocument(); PDDocument firstDocument = new PDDocument();

View file

@ -63,10 +63,7 @@ public class ApiDocService {
outputToFileTypes.put("PPT", Arrays.asList("ppt", "pptx", "odp")); outputToFileTypes.put("PPT", Arrays.asList("ppt", "pptx", "odp"));
outputToFileTypes.put("XML", Arrays.asList("xml", "xsd", "xsl")); outputToFileTypes.put("XML", Arrays.asList("xml", "xsd", "xsl"));
outputToFileTypes.put( outputToFileTypes.put(
"BOOK", "BOOK", Arrays.asList("epub", "mobi", "azw3", "fb2", "txt", "docx"));
Arrays.asList(
"epub", "mobi", "azw3", "fb2", "txt",
"docx"));
// type. // type.
} }

View file

@ -1,131 +0,0 @@
package stirling.software.SPDF.pdf;
import java.awt.geom.Point2D;
import java.io.IOException;
import java.util.List;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
public class ImageFinder extends org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine {
private boolean hasImages = false;
public ImageFinder(PDPage page) {
super(page);
}
public boolean hasImages() {
return hasImages;
}
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
String operation = operator.getName();
if (operation.equals(OperatorName.DRAW_OBJECT)) {
COSBase base = operands.get(0);
if (base instanceof COSName) {
COSName objectName = (COSName) base;
PDXObject xobject = getResources().getXObject(objectName);
if (xobject instanceof PDImageXObject) {
hasImages = true;
} else if (xobject instanceof PDFormXObject) {
PDFormXObject form = (PDFormXObject) xobject;
ImageFinder innerFinder = new ImageFinder(getPage());
innerFinder.processPage(getPage());
if (innerFinder.hasImages()) {
hasImages = true;
}
}
}
}
super.processOperator(operator, operands);
}
@Override
public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
// TODO Auto-generated method stub
}
@Override
public void drawImage(PDImage pdImage) throws IOException {
// TODO Auto-generated method stub
}
@Override
public void clip(int windingRule) throws IOException {
// TODO Auto-generated method stub
}
@Override
public void moveTo(float x, float y) throws IOException {
// TODO Auto-generated method stub
}
@Override
public void lineTo(float x, float y) throws IOException {
// TODO Auto-generated method stub
}
@Override
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3)
throws IOException {
// TODO Auto-generated method stub
}
@Override
public Point2D getCurrentPoint() throws IOException {
// TODO Auto-generated method stub
return null;
}
@Override
public void closePath() throws IOException {
// TODO Auto-generated method stub
}
@Override
public void endPath() throws IOException {
// TODO Auto-generated method stub
}
@Override
public void strokePath() throws IOException {
// TODO Auto-generated method stub
}
@Override
public void fillPath(int windingRule) throws IOException {
// TODO Auto-generated method stub
}
@Override
public void fillAndStrokePath(int windingRule) throws IOException {
// TODO Auto-generated method stub
}
@Override
public void shadingFill(COSName shadingName) throws IOException {
// TODO Auto-generated method stub
}
// ... rest of the overridden methods
}

View file

@ -2,8 +2,10 @@ package stirling.software.SPDF.utils;
import java.awt.Graphics; import java.awt.Graphics;
import java.awt.image.BufferedImage; import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.zip.ZipEntry; import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream; import java.util.zip.ZipOutputStream;
@ -16,11 +18,15 @@ import javax.imageio.ImageWriter;
import javax.imageio.stream.ImageOutputStream; import javax.imageio.stream.ImageOutputStream;
import org.apache.pdfbox.Loader; import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory; import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory; import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
@ -31,8 +37,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.pdf.ImageFinder;
public class PdfUtils { public class PdfUtils {
private static final Logger logger = LoggerFactory.getLogger(PdfUtils.class); private static final Logger logger = LoggerFactory.getLogger(PdfUtils.class);
@ -62,6 +66,23 @@ public class PdfUtils {
} }
} }
public static List<RenderedImage> getAllImages(PDResources resources) throws IOException {
List<RenderedImage> images = new ArrayList<>();
for (COSName name : resources.getXObjectNames()) {
PDXObject object = resources.getXObject(name);
if (object instanceof PDImageXObject) {
images.add(((PDImageXObject) object).getImage());
} else if (object instanceof PDFormXObject) {
images.addAll(getAllImages(((PDFormXObject) object).getResources()));
}
}
return images;
}
public static boolean hasImages(PDDocument document, String pagesToCheck) throws IOException { public static boolean hasImages(PDDocument document, String pagesToCheck) throws IOException {
String[] pageOrderArr = pagesToCheck.split(","); String[] pageOrderArr = pagesToCheck.split(",");
List<Integer> pageList = List<Integer> pageList =
@ -94,9 +115,7 @@ public class PdfUtils {
} }
public static boolean hasImagesOnPage(PDPage page) throws IOException { public static boolean hasImagesOnPage(PDPage page) throws IOException {
ImageFinder imageFinder = new ImageFinder(page); return getAllImages(page.getResources()).size() > 0;
imageFinder.processPage(page);
return imageFinder.hasImages();
} }
public static boolean hasTextOnPage(PDPage page, String phrase) throws IOException { public static boolean hasTextOnPage(PDPage page, String phrase) throws IOException {