auto rename
This commit is contained in:
parent
f92482d89e
commit
4e28bf03bd
7 changed files with 212 additions and 2 deletions
|
@ -0,0 +1,164 @@
|
|||
package stirling.software.SPDF.controller.api.other;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RequestPart;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import stirling.software.SPDF.utils.GeneralUtils;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
import org.apache.pdfbox.pdmodel.*;
|
||||
import org.apache.pdfbox.pdmodel.common.*;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream.*;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.http.*;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import io.swagger.v3.oas.annotations.*;
|
||||
import io.swagger.v3.oas.annotations.media.*;
|
||||
import io.swagger.v3.oas.annotations.parameters.*;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.apache.tomcat.util.http.ResponseUtil;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.itextpdf.io.font.constants.StandardFonts;
|
||||
import com.itextpdf.kernel.font.PdfFont;
|
||||
import com.itextpdf.kernel.font.PdfFontFactory;
|
||||
import com.itextpdf.kernel.geom.Rectangle;
|
||||
import com.itextpdf.kernel.pdf.PdfReader;
|
||||
import com.itextpdf.kernel.pdf.PdfWriter;
|
||||
import com.itextpdf.kernel.pdf.PdfDocument;
|
||||
import com.itextpdf.kernel.pdf.PdfPage;
|
||||
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
|
||||
import com.itextpdf.layout.Canvas;
|
||||
import com.itextpdf.layout.element.Paragraph;
|
||||
import com.itextpdf.layout.properties.TextAlignment;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
import java.io.*;
|
||||
import org.apache.pdfbox.pdmodel.*;
|
||||
import org.apache.pdfbox.text.*;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import io.swagger.v3.oas.annotations.*;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
@RestController
|
||||
@Tag(name = "Other", description = "Other APIs")
|
||||
public class AutoRenameController {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(AutoRenameController.class);
|
||||
|
||||
private static final float TITLE_FONT_SIZE_THRESHOLD = 20.0f;
|
||||
private static final int LINE_LIMIT = 7;
|
||||
|
||||
@PostMapping(consumes = "multipart/form-data", value = "/auto-rename")
|
||||
@Operation(summary = "Extract header from PDF file", description = "This endpoint accepts a PDF file and attempts to extract its title or header based on heuristics. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> extractHeader(
|
||||
@RequestPart(value = "fileInput") @Parameter(description = "The input PDF file from which the header is to be extracted.", required = true) MultipartFile file,
|
||||
@RequestParam(required = false, defaultValue = "false") @Parameter(description = "Flag indicating whether to use the first text as a fallback if no suitable title is found. Defaults to false.", required = false) Boolean useFirstTextAsFallback)
|
||||
throws Exception {
|
||||
|
||||
PDDocument document = PDDocument.load(file.getInputStream());
|
||||
PDFTextStripper reader = new PDFTextStripper() {
|
||||
class LineInfo {
|
||||
String text;
|
||||
float fontSize;
|
||||
|
||||
LineInfo(String text, float fontSize) {
|
||||
this.text = text;
|
||||
this.fontSize = fontSize;
|
||||
}
|
||||
}
|
||||
|
||||
List<LineInfo> lineInfos = new ArrayList<>();
|
||||
StringBuilder lineBuilder = new StringBuilder();
|
||||
float lastY = -1;
|
||||
float maxFontSizeInLine = 0.0f;
|
||||
int lineCount = 0;
|
||||
|
||||
@Override
|
||||
protected void processTextPosition(TextPosition text) {
|
||||
if (lastY != text.getY() && lineCount < LINE_LIMIT) {
|
||||
processLine();
|
||||
lineBuilder = new StringBuilder(text.getUnicode());
|
||||
maxFontSizeInLine = text.getFontSizeInPt();
|
||||
lastY = text.getY();
|
||||
lineCount++;
|
||||
} else if (lineCount < LINE_LIMIT) {
|
||||
lineBuilder.append(text.getUnicode());
|
||||
if (text.getFontSizeInPt() > maxFontSizeInLine) {
|
||||
maxFontSizeInLine = text.getFontSizeInPt();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void processLine() {
|
||||
if (lineBuilder.length() > 0 && lineCount < LINE_LIMIT) {
|
||||
lineInfos.add(new LineInfo(lineBuilder.toString(), maxFontSizeInLine));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText(PDDocument doc) throws IOException {
|
||||
this.lineInfos.clear();
|
||||
this.lineBuilder = new StringBuilder();
|
||||
this.lastY = -1;
|
||||
this.maxFontSizeInLine = 0.0f;
|
||||
this.lineCount = 0;
|
||||
super.getText(doc);
|
||||
processLine(); // Process the last line
|
||||
|
||||
// Sort lines by font size in descending order and get the first one
|
||||
lineInfos.sort(Comparator.comparing((LineInfo li) -> li.fontSize).reversed());
|
||||
String title = lineInfos.isEmpty() ? null : lineInfos.get(0).text;
|
||||
|
||||
return title != null ? title : (useFirstTextAsFallback ? (lineInfos.isEmpty() ? null : lineInfos.get(lineInfos.size() - 1).text) : null);
|
||||
}
|
||||
};
|
||||
|
||||
String header = reader.getText(document);
|
||||
|
||||
|
||||
|
||||
// Sanitize the header string by removing characters not allowed in a filename.
|
||||
if (header != null && header.length() < 255) {
|
||||
header = header.replaceAll("[/\\\\?%*:|\"<>]", "");
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, header + ".pdf");
|
||||
} else {
|
||||
logger.info("File has no good title to be found");
|
||||
return WebResponseUtils.pdfDocToWebResponse(document, file.getOriginalFilename());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -140,4 +140,11 @@ public class OtherWebController {
|
|||
return "other/auto-crop";
|
||||
}
|
||||
|
||||
@GetMapping("/auto-rename")
|
||||
@Hidden
|
||||
public String autoRenameForm(Model model) {
|
||||
model.addAttribute("currentPage", "auto-rename");
|
||||
return "other/auto-rename";
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -138,6 +138,10 @@ home.pipeline.desc=Run multiple actions on PDFs by defining pipeline scripts
|
|||
home.add-page-numbers.title=Add Page Numbers
|
||||
home.add-page-numbers.desc=Add Page numbers throughout a document in a set location
|
||||
|
||||
home.auto-rename.title=Auto Rename PDF File
|
||||
home.auto-rename.desc=Auto renames a PDF file based on its detected header
|
||||
|
||||
|
||||
error.pdfPassword=The PDF Document is passworded and either the password was not provided or was incorrect
|
||||
|
||||
downloadPdf=Download PDF
|
||||
|
|
3
src/main/resources/static/images/fonts.svg
Normal file
3
src/main/resources/static/images/fonts.svg
Normal file
|
@ -0,0 +1,3 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-fonts" viewBox="0 0 16 16">
|
||||
<path d="M12.258 3h-8.51l-.083 2.46h.479c.26-1.544.758-1.783 2.693-1.845l.424-.013v7.827c0 .663-.144.82-1.3.923v.52h4.082v-.52c-1.162-.103-1.306-.26-1.306-.923V3.602l.431.013c1.934.062 2.434.301 2.693 1.846h.479L12.258 3z"/>
|
||||
</svg>
|
After Width: | Height: | Size: 357 B |
|
@ -116,7 +116,8 @@
|
|||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('repair', 'images/wrench.svg', 'home.repair.title', 'home.repair.desc')}"></div>
|
||||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('remove-blanks', 'images/blank-file.svg', 'home.removeBlanks.title', 'home.removeBlanks.desc')}"></div>
|
||||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('compare', 'images/scales.svg', 'home.compare.title', 'home.compare.desc')}"></div>
|
||||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('compare', 'images/add-page-numbers.svg', 'home.add-page-numbers.title', 'home.add-page-numbers.desc')}"></div>
|
||||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('add-page-numbers', 'images/add-page-numbers.svg', 'home.add-page-numbers.title', 'home.add-page-numbers.desc')}"></div>
|
||||
<div th:replace="~{fragments/navbarEntry :: navbarEntry ('auto-rename', 'images/fonts.svg', 'home.auto-rename.title', 'home.auto-rename.desc')}"></div>
|
||||
</div>
|
||||
</li>
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@
|
|||
<div th:replace="~{fragments/card :: card(id='scale-pages', cardTitle=#{home.scalePages.title}, cardText=#{home.scalePages.desc}, cardLink='scale-pages', svgPath='images/scale-pages.svg')}"></div>
|
||||
|
||||
<div th:replace="~{fragments/card :: card(id='add-page-numbers', cardTitle=#{home.add-page-numbers.title}, cardText=#{home.add-page-numbers.desc}, cardLink='add-page-numbers', svgPath='images/add-page-numbers.svg')}"></div>
|
||||
|
||||
<div th:replace="~{fragments/card :: card(id='auto-rename', cardTitle=#{home.auto-rename.title}, cardText=#{home.auto-rename.desc}, cardLink='auto-rename', svgPath='images/fonts.svg')}"></div>
|
||||
|
||||
|
||||
|
||||
|
|
31
src/main/resources/templates/other/auto-rename.html
Normal file
31
src/main/resources/templates/other/auto-rename.html
Normal file
|
@ -0,0 +1,31 @@
|
|||
<!DOCTYPE html>
|
||||
<html th:lang="${#locale.toString()}" th:lang-direction="#{language.direction}" xmlns:th="http://www.thymeleaf.org">
|
||||
|
||||
<th:block th:insert="~{fragments/common :: head(title=#{auto-rename.title})}"></th:block>
|
||||
|
||||
|
||||
<body>
|
||||
<th:block th:insert="~{fragments/common :: game}"></th:block>
|
||||
<div id="page-container">
|
||||
<div id="content-wrap">
|
||||
<div th:insert="~{fragments/navbar.html :: navbar}"></div>
|
||||
<br> <br>
|
||||
<div class="container">
|
||||
<div class="row justify-content-center">
|
||||
<div class="col-md-6">
|
||||
<h2 th:text="#{auto-rename.header}"></h2>
|
||||
<form method="post" enctype="multipart/form-data" th:action="@{auto-rename}">
|
||||
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
|
||||
<br>
|
||||
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{auto-rename.submit}"></button>
|
||||
</form>
|
||||
<p class="mt-3" th:text="#{auto-rename.credit}"></p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div th:insert="~{fragments/footer.html :: footer}"></div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in a new issue