init home for toher featues

This commit is contained in:
Anthony Stirling 2023-05-08 22:55:01 +01:00
parent a9e22947ef
commit 28faf3888c
9 changed files with 197 additions and 83 deletions

View file

@ -24,6 +24,8 @@ Feel free to request any features or bug fixes either in github issues or our [D
- Add/Generate signatures - Add/Generate signatures
- Flatten PDFs - Flatten PDFs
- Repair PDFs - Repair PDFs
- Detect and remove blank pages
- Compare 2 PDFs and show differences in text
- Add images to PDFs - Add images to PDFs
- Rotating PDFs in 90 degree increments. - Rotating PDFs in 90 degree increments.
- Compressing PDFs to decrease their filesize. (Using OCRMyPDF) - Compressing PDFs to decrease their filesize. (Using OCRMyPDF)
@ -77,10 +79,12 @@ docker run -d \
frooodle/s-pdf frooodle/s-pdf
Can also add these for customisation Can also add these for customisation but are not required
-e APP_HOME_NAME="Stirling PDF" \ -e APP_HOME_NAME="Stirling PDF" \
-e APP_HOME_DESCRIPTION="Your locally hosted one-stop-shop for all your PDF needs." \ -e APP_HOME_DESCRIPTION="Your locally hosted one-stop-shop for all your PDF needs." \
-e APP_NAVBAR_NAME="Stirling PDF" \ -e APP_NAVBAR_NAME="Stirling PDF" \
-e ALLOW_GOOGLE_VISABILITY="true" \
-e APP_LOCALE="en_GB" \
``` ```
Docker Compose Docker Compose
``` ```
@ -94,9 +98,11 @@ services:
- /location/of/trainingData:/usr/share/tesseract-ocr/4.00/tessdata #Required for extra OCR languages - /location/of/trainingData:/usr/share/tesseract-ocr/4.00/tessdata #Required for extra OCR languages
# - /location/of/extraConfigs:/configs # - /location/of/extraConfigs:/configs
# environment: # environment:
# APP_LOCALE: en_GB
# APP_HOME_NAME: Stirling PDF # APP_HOME_NAME: Stirling PDF
# APP_HOME_DESCRIPTION: Your locally hosted one-stop-shop for all your PDF needs. # APP_HOME_DESCRIPTION: Your locally hosted one-stop-shop for all your PDF needs.
# APP_NAVBAR_NAME: Stirling PDF # APP_NAVBAR_NAME: Stirling PDF
# ALLOW_GOOGLE_VISABILITY: true
``` ```
@ -122,7 +128,9 @@ Stirling PDF allows easy customization of the visible application name.
Simply use environment variables APP_HOME_NAME, APP_HOME_DESCRIPTION and APP_NAVBAR_NAME with Docker or Java. Simply use environment variables APP_HOME_NAME, APP_HOME_DESCRIPTION and APP_NAVBAR_NAME with Docker or Java.
If running Java directly, you can also pass these as properties using -D arguments. If running Java directly, you can also pass these as properties using -D arguments.
Using the same method you can also change the default language by providing APP_LOCALE with values like de-DE fr-FR or ar-AR to select your default language (Will always default to English on invalid locale) Using the same method you can also change
- The default language by providing APP_LOCALE with values like de-DE fr-FR or ar-AR to select your default language (Will always default to English on invalid locale)
- Enable/Disable search engine visablility with ALLOW_GOOGLE_VISABILITY with true / false values. Default disable visability.
## API ## API
For those wanting to use Stirling-PDFs backend API to link with their own custom scripting to edit PDFs you can view all existing API documentation For those wanting to use Stirling-PDFs backend API to link with their own custom scripting to edit PDFs you can view all existing API documentation

View file

@ -30,8 +30,9 @@ public class BlankPageController {
@PostMapping(consumes = "multipart/form-data", value = "/remove-blanks") @PostMapping(consumes = "multipart/form-data", value = "/remove-blanks")
public ResponseEntity<byte[]> removeBlankPages(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile) throws IOException, InterruptedException { public ResponseEntity<byte[]> removeBlankPages(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
PDDocument document = null;
try { try {
PDDocument document = PDDocument.load(inputFile.getInputStream()); document = PDDocument.load(inputFile.getInputStream());
PDPageTree pages = document.getDocumentCatalog().getPages(); PDPageTree pages = document.getDocumentCatalog().getPages();
PDFTextStripper textStripper = new PDFTextStripper(); PDFTextStripper textStripper = new PDFTextStripper();
@ -67,7 +68,7 @@ public class BlankPageController {
BufferedImage image = pdfRenderer.renderImageWithDPI(i - 1, 300); BufferedImage image = pdfRenderer.renderImageWithDPI(i - 1, 300);
ImageIO.write(image, "png", tempFile.toFile()); ImageIO.write(image, "png", tempFile.toFile());
List<String> command = new ArrayList<>(Arrays.asList("python3", "./scripts/detect-blank-pages.py", tempFile.toString())); List<String> command = new ArrayList<>(Arrays.asList("python3", System.getProperty("user.dir") + "scripts/detect-blank-pages.py", tempFile.toString()));
// Run CLI command // Run CLI command
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command); int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command);
@ -81,12 +82,15 @@ public class BlankPageController {
} }
document.close();
return PdfUtils.pdfDocToWebResponse(outputDocument, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_blanksRemoved.pdf"); return PdfUtils.pdfDocToWebResponse(outputDocument, inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_blanksRemoved.pdf");
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR); return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
} finally {
if(document != null)
document.close();
} }
} }

View file

@ -84,9 +84,9 @@ public class GeneralWebController {
if (allowGoogleVisibility == null) if (allowGoogleVisibility == null)
allowGoogleVisibility = System.getenv("ALLOW_GOOGLE_VISABILITY"); allowGoogleVisibility = System.getenv("ALLOW_GOOGLE_VISABILITY");
if (allowGoogleVisibility == null) if (allowGoogleVisibility == null)
allowGoogleVisibility = "true"; allowGoogleVisibility = "false";
if (Boolean.parseBoolean(allowGoogleVisibility)) { if (Boolean.parseBoolean(allowGoogleVisibility)) {
return "User-agent: Googlebot\nAllow: /\n\nUser-agent: *\nDisallow: /"; return "User-agent: Googlebot\nAllow: /\n\nUser-agent: *\nAllow: /";
} else { } else {
return "User-agent: Googlebot\nDisallow: /\n\nUser-agent: *\nDisallow: /"; return "User-agent: Googlebot\nDisallow: /\n\nUser-agent: *\nDisallow: /";
} }

View file

@ -117,6 +117,12 @@ home.flatten.desc=Remove all interactive elements and forms from a PDF
home.repair.title=Repair home.repair.title=Repair
home.repair.desc=Tries to repair a corrupt/broken PDF home.repair.desc=Tries to repair a corrupt/broken PDF
home.removeBlanks.title=Remove Blank pages
home.removeBlanks.desc=Detects and removes blank pages from a document
home.compare.title=Compare
home.compare.desc=Compares and shows the differences between 2 PDF Documents
downloadPdf=Download PDF downloadPdf=Download PDF
text=Text text=Text
font=Font font=Font

View file

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-file" viewBox="0 0 16 16">
<path d="M4 0a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2H4zm0 1h8a1 1 0 0 1 1 1v12a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1z"/>
</svg>

After

Width:  |  Height:  |  Size: 284 B

View file

@ -0,0 +1,94 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Generator: Adobe Illustrator 13.0.2, SVG Export Plug-In . SVG Version: 6.00 Build 14948) -->
<svg
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://www.w3.org/2000/svg"
xmlns:cc="http://web.resource.org/cc/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:ns1="http://sozi.baierouge.fr"
id="Layer_1"
style="enable-background:new 0 0 333.33 287.889"
xml:space="preserve"
viewBox="0 0 333.33 287.889"
version="1.1"
y="0px"
x="0px"
>
<g
>
<polygon
style="fill:none"
points="19.374 136.98 98.932 136.98 59.083 18.273"
/>
<polygon
style="fill:none"
points="235.57 191.34 315.13 191.34 275.28 72.635"
/>
<path
d="m318.63 191.34l-42.85-127.63 6.892 1.386c-0.563 2.401 0.902 4.816 3.304 5.41 2.419 0.599 4.869-0.877 5.47-3.298 0.602-2.42-0.878-4.868-3.301-5.469-2.095-0.518-4.205 0.523-5.125 2.384l-58.16-17.14-54.361-16.029-0.509-9.298c5.801-0.21 10.444-4.966 10.444-10.818 0.01-5.985-4.85-10.838-10.84-10.838-5.985 0-10.837 4.853-10.837 10.838 0 5.288 3.79 9.686 8.8 10.64l-0.477 8.708-53.87-10.838-60.542-12.179c0.08-2.094-1.315-4.028-3.429-4.552-2.419-0.599-4.867 0.879-5.466 3.299-0.599 2.421 0.877 4.869 3.297 5.468 2.418 0.598 4.865-0.876 5.468-3.292l5.889 1.736-42.537 127.15h-15.88s9.255 20.324 58.069 20.324 54.804-20.324 54.804-20.324h-10.446l-42.535-126.72 51.784 15.269 54.78 16.151-11.76 214.82s0.461 9.693-14.772 12.002c-15.234 2.308-34.621 3.229-43.853 8.309-9.233 5.078-10.617 11.078-10.617 11.078h166.64s-1.386-6-10.615-11.078c-9.234-5.079-28.621-6.001-43.854-8.309-15.233-2.309-14.772-12.002-14.772-12.002l-11.72-213.83 52.188 10.499 51.506 10.362-42.755 127.82h-11.623s9.255 20.324 58.068 20.324 54.804-20.324 54.804-20.324h-14.7zm-219.7-54.36h-79.558l39.709-118.71 39.849 118.71zm136.64 54.36l39.708-118.71 39.85 118.71h-79.56z"
/>
</g
>
<metadata
><rdf:RDF
><cc:Work
><dc:format
>image/svg+xml</dc:format
><dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage"
/><cc:license
rdf:resource="http://creativecommons.org/licenses/publicdomain/"
/><dc:publisher
><cc:Agent
rdf:about="http://openclipart.org/"
><dc:title
>Openclipart</dc:title
></cc:Agent
></dc:publisher
><dc:title
>scales of justice</dc:title
><dc:date
>2009-06-26T04:35:18</dc:date
><dc:description
/><dc:source
>https://openclipart.org/detail/26849/scales-of-justice-by-johnny_automatic</dc:source
><dc:creator
><cc:Agent
><dc:title
>johnny_automatic</dc:title
></cc:Agent
></dc:creator
><dc:subject
><rdf:Bag
><rdf:li
>justice</rdf:li
><rdf:li
>law</rdf:li
><rdf:li
>measurement</rdf:li
><rdf:li
>scales</rdf:li
><rdf:li
>silhouette</rdf:li
><rdf:li
>weight</rdf:li
></rdf:Bag
></dc:subject
></cc:Work
><cc:License
rdf:about="http://creativecommons.org/licenses/publicdomain/"
><cc:permits
rdf:resource="http://creativecommons.org/ns#Reproduction"
/><cc:permits
rdf:resource="http://creativecommons.org/ns#Distribution"
/><cc:permits
rdf:resource="http://creativecommons.org/ns#DerivativeWorks"
/></cc:License
></rdf:RDF
></metadata
></svg
>

After

Width:  |  Height:  |  Size: 3.8 KiB

View file

@ -112,8 +112,8 @@ filter: invert(0.2) sepia(2) saturate(50) hue-rotate(190deg);
<div th:replace="~{fragments/card :: card(cardTitle=#{home.flatten.title}, cardText=#{home.flatten.desc}, cardLink='flatten', svgPath='images/flatten.svg')}"></div> <div th:replace="~{fragments/card :: card(cardTitle=#{home.flatten.title}, cardText=#{home.flatten.desc}, cardLink='flatten', svgPath='images/flatten.svg')}"></div>
<div th:replace="~{fragments/card :: card(cardTitle=#{home.repair.title}, cardText=#{home.repair.desc}, cardLink='repair', svgPath='images/wrench.svg')}"></div> <div th:replace="~{fragments/card :: card(cardTitle=#{home.repair.title}, cardText=#{home.repair.desc}, cardLink='repair', svgPath='images/wrench.svg')}"></div>
<div th:replace="~{fragments/card :: card(cardTitle=#{home.removeBlanks.title}, cardText=#{home.removeBlanks.desc}, cardLink='remove-blanks', svgPath='images/wrench.svg')}"></div> <div th:replace="~{fragments/card :: card(cardTitle=#{home.removeBlanks.title}, cardText=#{home.removeBlanks.desc}, cardLink='remove-blanks', svgPath='images/blank-file.svg')}"></div>
<div th:replace="~{fragments/card :: card(cardTitle=#{home.compare.title}, cardText=#{home.compare.desc}, cardLink='compare', svgPath='images/wrench.svg')}"></div> <div th:replace="~{fragments/card :: card(cardTitle=#{home.compare.title}, cardText=#{home.compare.desc}, cardLink='compare', svgPath='images/scales.svg')}"></div>
</div> </div>
</div> </div>
<div th:insert="~{fragments/footer.html :: footer}"></div> <div th:insert="~{fragments/footer.html :: footer}"></div>

View file

@ -18,7 +18,25 @@
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div> <div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
<div th:replace="~{fragments/common :: fileSelector(name='fileInput2', multiple=false, accept='application/pdf')}"></div> <div th:replace="~{fragments/common :: fileSelector(name='fileInput2', multiple=false, accept='application/pdf')}"></div>
<button onclick="comparePDFs()">Compare</button> <button onclick="comparePDFs()">Compare</button>
<div id="result"></div> <div class="row">
<div class="col-md-6">
<h3>Document 1</h3>
<div id="result1" class="result-column"></div>
</div>
<div class="col-md-6">
<h3>Document 2</h3>
<div id="result2" class="result-column"></div>
</div>
</div>
<style>
.result-column {
border: 1px solid #ccc;
padding: 15px;
overflow-y: scroll;
height: 400px;
white-space: pre-wrap;
}
</style>
<script> <script>
async function comparePDFs() { async function comparePDFs() {
const file1 = document.getElementById("fileInput-input").files[0]; const file1 = document.getElementById("fileInput-input").files[0];
@ -40,9 +58,9 @@
const page = await pdf.getPage(i); const page = await pdf.getPage(i);
const content = await page.getTextContent(); const content = await page.getTextContent();
const strings = content.items.map(item => item.str); const strings = content.items.map(item => item.str);
pages.push(strings.join("")); pages.push(strings.join(" "));
} }
return pages.join("\n"); return pages.join(" ");
}; };
const [text1, text2] = await Promise.all([ const [text1, text2] = await Promise.all([
@ -50,78 +68,62 @@
extractText(pdf2) extractText(pdf2)
]); ]);
if (text1.trim() === "" || text2.trim() === "") {
alert("One or both of the selected PDFs have no text content. Please choose PDFs with text for comparison.");
return;
}
const diff = (text1, text2) => { const diff = (text1, text2) => {
const lines1 = text1.split("\n"); // ... Keep the same diff function from the previous response ...
const lines2 = text2.split("\n");
const result = [];
let i = 0, j = 0;
while (i < lines1.length || j < lines2.length) {
console.log(`lines1[${i}]='${lines1[i]}', lines2[${j}]='${lines2[j]}'`);
console.log(`i=${i}, j=${j}`);
if (lines1[i] === lines2[j]) {
result.push([i, j, lines1[i]]);
i++;
j++;
console.log(`i=${i}, j=${j}`);
} else {
let k = i, l = j;
while (k < lines1.length && l < lines2.length && lines1[k] !== lines2[l]) {
k++;
l++;
}
for (let x = i; x < k; x++) {
result.push([x, -1, lines1[x]]);
}
for (let y = j; y < l; y++) {
result.push([-1, y, lines2[y]]);
}
i = k;
j = l;
}
}
return result;
}; };
const differences = diff(text1, text2); const differences = diff(text1, text2);
const highlightDifferences = async (pdf, differences) => {
for (const difference of differences) { const displayDifferences = (differences) => {
const [pageIndex, lineIndex, lineText] = difference; const resultDiv1 = document.getElementById("result1");
if (lineIndex === -1) { const resultDiv2 = document.getElementById("result2");
continue; resultDiv1.innerHTML = "";
resultDiv2.innerHTML = "";
let doc1Pointer = 0;
let doc2Pointer = 0;
differences.forEach(([color, word]) => {
const span1 = document.createElement("span");
const span2 = document.createElement("span");
if (color === "green") {
span1.style.color = color;
span1.textContent = word;
resultDiv1.appendChild(span1);
doc1Pointer++;
} else if (color === "red") {
span2.style.color = color;
span2.textContent = word;
resultDiv2.appendChild(span2);
doc2Pointer++;
} else {
span1.style.color = color;
span1.textContent = word;
resultDiv1.appendChild(span1);
doc1Pointer++;
span2.style.color = color;
span2.textContent = word;
resultDiv2.appendChild(span2);
doc2Pointer++;
} }
console.log(pageIndex);
const page = await pdf.getPage(pageIndex); // Add space after each word
const viewport = page.getViewport({ scale: 1 }); const space1 = document.createElement("span");
const [left,top] = viewport.convertToViewportPoint(0, lineIndex * 20); const space2 = document.createElement("span");
const [right, bottom] = viewport.convertToViewportPoint(500, (lineIndex + 1) * 20); space1.textContent = " ";
const annotation = { space2.textContent = " ";
type: "Highlight", resultDiv1.appendChild(space1);
rect: [left, top, right - left, bottom - top], resultDiv2.appendChild(space2);
color: [255, 255, 0], });
opacity: 0.5, return result;
quadPoints:
[
left, top, right, top, right, bottom, left, bottom
]
}; };
console.log('Differences:', differences);
await page.addAnnotation(annotation); displayDifferences(differences);
const message = `Difference found in page ${pageIndex }, line ${lineIndex + 1}: ${lineText}`;
const p = document.createElement("p");
p.textContent = message;
document.getElementById("result").appendChild(p);
}
};
await highlightDifferences(pdf1, differences);
} }
</script> </script>
</div> </div>
@ -130,6 +132,3 @@
</div> </div>
<div th:insert="~{fragments/footer.html :: footer}"></div> <div th:insert="~{fragments/footer.html :: footer}"></div>
</div> </div>
</body>
</html>