Merge pull request #1297 from Stirling-Tools/cucumber
Cucumber testcases
This commit is contained in:
commit
b93bff5cad
9 changed files with 489 additions and 6 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -125,3 +125,6 @@ watchedFolders/
|
||||||
# Ignore Mac DS_Store files
|
# Ignore Mac DS_Store files
|
||||||
.DS_Store
|
.DS_Store
|
||||||
**/.DS_Store
|
**/.DS_Store
|
||||||
|
|
||||||
|
#cucumber
|
||||||
|
/cucumber/reports/**
|
16
cucumber/features/environment.py
Normal file
16
cucumber/features/environment.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
def before_all(context):
|
||||||
|
context.endpoint = None
|
||||||
|
context.request_data = None
|
||||||
|
context.files = {}
|
||||||
|
context.response = None
|
||||||
|
|
||||||
|
def after_scenario(context, scenario):
|
||||||
|
if hasattr(context, 'files'):
|
||||||
|
for file in context.files.values():
|
||||||
|
file.close()
|
||||||
|
if os.path.exists('response_file'):
|
||||||
|
os.remove('response_file')
|
||||||
|
if hasattr(context, 'file_name') and os.path.exists(context.file_name):
|
||||||
|
os.remove(context.file_name)
|
242
cucumber/features/examples.feature
Normal file
242
cucumber/features/examples.feature
Normal file
|
@ -0,0 +1,242 @@
|
||||||
|
Feature: API Validation
|
||||||
|
|
||||||
|
Scenario: Remove password
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 pages
|
||||||
|
And the pdf is encrypted with password "password123"
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| password | password123 |
|
||||||
|
When I send the API request to the endpoint "/api/v1/security/remove-password"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 0
|
||||||
|
And the response PDF is not passworded
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
Scenario: Remove password wrong password
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 pages
|
||||||
|
And the pdf is encrypted with password "password123"
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| password | wrongPassword |
|
||||||
|
When I send the API request to the endpoint "/api/v1/security/remove-password"
|
||||||
|
Then the response status code should be 500
|
||||||
|
And the response should contain error message "Internal Server Error"
|
||||||
|
|
||||||
|
Scenario: Get info
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
When I send the API request to the endpoint "/api/v1/security/get-info-on-pdf"
|
||||||
|
Then the response content type should be "application/json"
|
||||||
|
And the response file should have size greater than 100
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
Scenario: Add password
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 pages
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| password | password123 |
|
||||||
|
When I send the API request to the endpoint "/api/v1/security/add-password"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 100
|
||||||
|
And the response PDF is passworded
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
Scenario: Add password with other params
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 pages
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| ownerPassword | ownerPass |
|
||||||
|
| password | password123 |
|
||||||
|
| keyLength | 256 |
|
||||||
|
| canPrint | true |
|
||||||
|
| canModify | false |
|
||||||
|
When I send the API request to the endpoint "/api/v1/security/add-password"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 100
|
||||||
|
And the response PDF is passworded
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
|
||||||
|
Scenario: Add watermark
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 pages
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| watermarkType | text |
|
||||||
|
| watermarkText | Sample Watermark |
|
||||||
|
| fontSize | 30 |
|
||||||
|
| rotation | 45 |
|
||||||
|
| opacity | 0.5 |
|
||||||
|
| widthSpacer | 50 |
|
||||||
|
| heightSpacer | 50 |
|
||||||
|
When I send the API request to the endpoint "/api/v1/security/add-watermark"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 100
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Scenario: Repair PDF
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
When I send the API request to the endpoint "/api/v1/misc/repair"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 0
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Scenario: Remove blank pages
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 blank pages
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| threshold | 90 |
|
||||||
|
| whitePercent | 99.9 |
|
||||||
|
When I send the API request to the endpoint "/api/v1/misc/remove-blanks"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 0
|
||||||
|
And the response PDF should contain 0 pages
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
@ocr
|
||||||
|
Scenario: Process PDF with OCR
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| languages | eng |
|
||||||
|
| sidecar | false |
|
||||||
|
| deskew | true |
|
||||||
|
| clean | true |
|
||||||
|
| cleanFinal | true |
|
||||||
|
| ocrType | Normal |
|
||||||
|
| ocrRenderType | hocr |
|
||||||
|
| removeImagesAfter| false |
|
||||||
|
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 0
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
@ocr
|
||||||
|
Scenario: Process PDF with text and OCR with type normal
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 pages with random text
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| languages | eng |
|
||||||
|
| sidecar | false |
|
||||||
|
| deskew | true |
|
||||||
|
| clean | true |
|
||||||
|
| cleanFinal | true |
|
||||||
|
| ocrType | Normal |
|
||||||
|
| ocrRenderType | hocr |
|
||||||
|
| removeImagesAfter| false |
|
||||||
|
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
|
||||||
|
Then the response status code should be 500
|
||||||
|
|
||||||
|
@ocr
|
||||||
|
Scenario: Process PDF with OCR
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| languages | eng |
|
||||||
|
| sidecar | false |
|
||||||
|
| deskew | true |
|
||||||
|
| clean | true |
|
||||||
|
| cleanFinal | true |
|
||||||
|
| ocrType | Force |
|
||||||
|
| ocrRenderType | hocr |
|
||||||
|
| removeImagesAfter| false |
|
||||||
|
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 0
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
@ocr
|
||||||
|
Scenario: Process PDF with OCR with sidecar
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| languages | eng |
|
||||||
|
| sidecar | true |
|
||||||
|
| deskew | true |
|
||||||
|
| clean | true |
|
||||||
|
| cleanFinal | true |
|
||||||
|
| ocrType | Force |
|
||||||
|
| ocrRenderType | hocr |
|
||||||
|
| removeImagesAfter| false |
|
||||||
|
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
|
||||||
|
Then the response content type should be "application/octet-stream"
|
||||||
|
And the response file should have extension ".zip"
|
||||||
|
And the response file should have size greater than 0
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
|
||||||
|
Scenario: Flatten PDF
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| flattenOnlyForms | false |
|
||||||
|
When I send the API request to the endpoint "/api/v1/misc/flatten"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 0
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
Scenario: Update metadata
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| author | John Doe |
|
||||||
|
| title | Sample Title |
|
||||||
|
| subject | Sample Subject |
|
||||||
|
| keywords | sample, test |
|
||||||
|
| producer | Test Producer |
|
||||||
|
When I send the API request to the endpoint "/api/v1/misc/update-metadata"
|
||||||
|
Then the response content type should be "application/pdf"
|
||||||
|
And the response file should have size greater than 0
|
||||||
|
And the response PDF metadata should include "Author" as "John Doe"
|
||||||
|
And the response PDF metadata should include "Keywords" as "sample, test"
|
||||||
|
And the response PDF metadata should include "Subject" as "Sample Subject"
|
||||||
|
And the response PDF metadata should include "Title" as "Sample Title"
|
||||||
|
And the response status code should be 200
|
||||||
|
|
||||||
|
@libre
|
||||||
|
Scenario: Convert PDF to DOCX
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 pages with random text
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| outputFormat | docx |
|
||||||
|
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
|
||||||
|
Then the response status code should be 200
|
||||||
|
And the response file should have size greater than 100
|
||||||
|
And the response file should have extension ".docx"
|
||||||
|
# And the response DOCX should contain 3 pages
|
||||||
|
|
||||||
|
@libre
|
||||||
|
Scenario: Convert PDF to ODT
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 pages with random text
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| outputFormat | odt |
|
||||||
|
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
|
||||||
|
Then the response status code should be 200
|
||||||
|
And the response file should have size greater than 100
|
||||||
|
And the response file should have extension ".odt"
|
||||||
|
# And the response ODT should contain 3 pages
|
||||||
|
|
||||||
|
@libre
|
||||||
|
Scenario: Convert PDF to DOC
|
||||||
|
Given I generate a PDF file as "fileInput"
|
||||||
|
And the pdf contains 3 pages with random text
|
||||||
|
And the request data includes
|
||||||
|
| parameter | value |
|
||||||
|
| outputFormat | doc |
|
||||||
|
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
|
||||||
|
Then the response status code should be 200
|
||||||
|
And the response file should have extension ".doc"
|
||||||
|
And the response file should have size greater than 100
|
||||||
|
# And the response DOC should contain 3 pages
|
207
cucumber/features/steps/step_definitions.py
Normal file
207
cucumber/features/steps/step_definitions.py
Normal file
|
@ -0,0 +1,207 @@
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from behave import given, when, then
|
||||||
|
from PyPDF2 import PdfWriter, PdfReader
|
||||||
|
import io
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
from reportlab.lib.pagesizes import letter
|
||||||
|
from reportlab.pdfgen import canvas
|
||||||
|
import mimetypes
|
||||||
|
import requests
|
||||||
|
|
||||||
|
#########
|
||||||
|
# GIVEN #
|
||||||
|
#########
|
||||||
|
|
||||||
|
@given('I generate a PDF file as "{fileInput}"')
|
||||||
|
def step_generate_pdf(context, fileInput):
|
||||||
|
context.param_name = fileInput
|
||||||
|
context.file_name = "genericNonCustomisableName.pdf"
|
||||||
|
writer = PdfWriter()
|
||||||
|
writer.add_blank_page(width=72, height=72) # Single blank page
|
||||||
|
with open(context.file_name, 'wb') as f:
|
||||||
|
writer.write(f)
|
||||||
|
if not hasattr(context, 'files'):
|
||||||
|
context.files = {}
|
||||||
|
context.files[context.param_name] = open(context.file_name, 'rb')
|
||||||
|
|
||||||
|
@given('the pdf contains {page_count:d} pages')
|
||||||
|
def step_pdf_contains_pages(context, page_count):
|
||||||
|
writer = PdfWriter()
|
||||||
|
for i in range(page_count):
|
||||||
|
writer.add_blank_page(width=72, height=72)
|
||||||
|
with open(context.file_name, 'wb') as f:
|
||||||
|
writer.write(f)
|
||||||
|
context.files[context.param_name].close()
|
||||||
|
context.files[context.param_name] = open(context.file_name, 'rb')
|
||||||
|
|
||||||
|
# Duplicate for now...
|
||||||
|
@given('the pdf contains {page_count:d} blank pages')
|
||||||
|
def step_pdf_contains_blank_pages(context, page_count):
|
||||||
|
writer = PdfWriter()
|
||||||
|
for i in range(page_count):
|
||||||
|
writer.add_blank_page(width=72, height=72)
|
||||||
|
with open(context.file_name, 'wb') as f:
|
||||||
|
writer.write(f)
|
||||||
|
context.files[context.param_name].close()
|
||||||
|
context.files[context.param_name] = open(context.file_name, 'rb')
|
||||||
|
|
||||||
|
@given('the pdf contains {page_count:d} pages with random text')
|
||||||
|
def step_pdf_contains_pages_with_random_text(context, page_count):
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
c = canvas.Canvas(buffer, pagesize=letter)
|
||||||
|
width, height = letter
|
||||||
|
|
||||||
|
for _ in range(page_count):
|
||||||
|
text = ''.join(random.choices(string.ascii_letters + string.digits, k=100))
|
||||||
|
c.drawString(100, height - 100, text)
|
||||||
|
c.showPage()
|
||||||
|
|
||||||
|
c.save()
|
||||||
|
|
||||||
|
with open(context.file_name, 'wb') as f:
|
||||||
|
f.write(buffer.getvalue())
|
||||||
|
|
||||||
|
context.files[context.param_name].close()
|
||||||
|
context.files[context.param_name] = open(context.file_name, 'rb')
|
||||||
|
|
||||||
|
@given('the pdf pages all contain the text "{text}"')
|
||||||
|
def step_pdf_pages_contain_text(context, text):
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
c = canvas.Canvas(buffer, pagesize=letter)
|
||||||
|
width, height = letter
|
||||||
|
|
||||||
|
for _ in range(len(PdfReader(context.file_name).pages)):
|
||||||
|
c.drawString(100, height - 100, text)
|
||||||
|
c.showPage()
|
||||||
|
|
||||||
|
c.save()
|
||||||
|
|
||||||
|
with open(context.file_name, 'wb') as f:
|
||||||
|
f.write(buffer.getvalue())
|
||||||
|
|
||||||
|
context.files[context.param_name].close()
|
||||||
|
context.files[context.param_name] = open(context.file_name, 'rb')
|
||||||
|
|
||||||
|
@given('the pdf is encrypted with password "{password}"')
|
||||||
|
def step_encrypt_pdf(context, password):
|
||||||
|
writer = PdfWriter()
|
||||||
|
reader = PdfReader(context.file_name)
|
||||||
|
for i in range(len(reader.pages)):
|
||||||
|
writer.add_page(reader.pages[i])
|
||||||
|
writer.encrypt(password)
|
||||||
|
with open(context.file_name, 'wb') as f:
|
||||||
|
writer.write(f)
|
||||||
|
context.files[context.param_name].close()
|
||||||
|
context.files[context.param_name] = open(context.file_name, 'rb')
|
||||||
|
|
||||||
|
@given('the request data is')
|
||||||
|
def step_request_data(context):
|
||||||
|
context.request_data = eval(context.text)
|
||||||
|
|
||||||
|
@given('the request data includes')
|
||||||
|
def step_request_data_table(context):
|
||||||
|
context.request_data = {row['parameter']: row['value'] for row in context.table}
|
||||||
|
|
||||||
|
@given('save the generated PDF file as "{filename}" for debugging')
|
||||||
|
def save_generated_pdf(context, filename):
|
||||||
|
with open(filename, 'wb') as f:
|
||||||
|
f.write(context.files[context.param_name].read())
|
||||||
|
print(f"Saved generated PDF content to {filename}")
|
||||||
|
|
||||||
|
########
|
||||||
|
# WHEN #
|
||||||
|
########
|
||||||
|
|
||||||
|
@when('I send the API request to the endpoint "{endpoint}"')
|
||||||
|
def step_send_api_request(context, endpoint):
|
||||||
|
url = f"http://localhost:8080{endpoint}"
|
||||||
|
files = context.files if hasattr(context, 'files') else {}
|
||||||
|
|
||||||
|
if not hasattr(context, 'request_data') or context.request_data is None:
|
||||||
|
context.request_data = {}
|
||||||
|
|
||||||
|
form_data = []
|
||||||
|
for key, value in context.request_data.items():
|
||||||
|
form_data.append((key, (None, value)))
|
||||||
|
|
||||||
|
for key, file in files.items():
|
||||||
|
mime_type, _ = mimetypes.guess_type(file.name)
|
||||||
|
mime_type = mime_type or 'application/octet-stream'
|
||||||
|
print("form_data " + file.name + " with " + mime_type)
|
||||||
|
form_data.append((key, (file.name, file, mime_type)))
|
||||||
|
|
||||||
|
response = requests.post(url, files=form_data)
|
||||||
|
context.response = response
|
||||||
|
|
||||||
|
########
|
||||||
|
# THEN #
|
||||||
|
########
|
||||||
|
|
||||||
|
@then('the response content type should be "{content_type}"')
|
||||||
|
def step_check_response_content_type(context, content_type):
|
||||||
|
actual_content_type = context.response.headers.get('Content-Type', '')
|
||||||
|
assert actual_content_type.startswith(content_type), f"Expected {content_type} but got {actual_content_type}. Response content: {context.response.content}"
|
||||||
|
|
||||||
|
@then('the response file should have size greater than {size:d}')
|
||||||
|
def step_check_response_file_size(context, size):
|
||||||
|
response_file = io.BytesIO(context.response.content)
|
||||||
|
assert len(response_file.getvalue()) > size
|
||||||
|
|
||||||
|
@then('the response PDF is not passworded')
|
||||||
|
def step_check_response_pdf_not_passworded(context):
|
||||||
|
response_file = io.BytesIO(context.response.content)
|
||||||
|
reader = PdfReader(response_file)
|
||||||
|
assert not reader.is_encrypted
|
||||||
|
|
||||||
|
@then('the response PDF is passworded')
|
||||||
|
def step_check_response_pdf_passworded(context):
|
||||||
|
response_file = io.BytesIO(context.response.content)
|
||||||
|
try:
|
||||||
|
reader = PdfReader(response_file)
|
||||||
|
assert reader.is_encrypted
|
||||||
|
except PdfReadError as e:
|
||||||
|
raise AssertionError(f"Failed to read PDF: {str(e)}. Response content: {context.response.content}")
|
||||||
|
except Exception as e:
|
||||||
|
raise AssertionError(f"An error occurred: {str(e)}. Response content: {context.response.content}")
|
||||||
|
|
||||||
|
@then('the response status code should be {status_code:d}')
|
||||||
|
def step_check_response_status_code(context, status_code):
|
||||||
|
assert context.response.status_code == status_code, f"Expected status code {status_code} but got {context.response.status_code}"
|
||||||
|
|
||||||
|
@then('the response should contain error message "{message}"')
|
||||||
|
def step_check_response_error_message(context, message):
|
||||||
|
response_json = context.response.json()
|
||||||
|
assert response_json.get('error') == message, f"Expected error message '{message}' but got '{response_json.get('error')}'"
|
||||||
|
|
||||||
|
@then('the response PDF should contain {page_count:d} pages')
|
||||||
|
def step_check_response_pdf_page_count(context, page_count):
|
||||||
|
response_file = io.BytesIO(context.response.content)
|
||||||
|
reader = PdfReader(response_file)
|
||||||
|
assert len(reader.pages) == page_count, f"Expected {page_count} pages but got {len(reader.pages)} pages"
|
||||||
|
|
||||||
|
@then('the response PDF metadata should include "{metadata_key}" as "{metadata_value}"')
|
||||||
|
def step_check_response_pdf_metadata(context, metadata_key, metadata_value):
|
||||||
|
response_file = io.BytesIO(context.response.content)
|
||||||
|
reader = PdfReader(response_file)
|
||||||
|
metadata = reader.metadata
|
||||||
|
assert metadata.get("/" + metadata_key) == metadata_value, f"Expected {metadata_key} to be '{metadata_value}' but got '{metadata.get(metadata_key)}'"
|
||||||
|
|
||||||
|
@then('the response file should have extension "{extension}"')
|
||||||
|
def step_check_response_file_extension(context, extension):
|
||||||
|
content_disposition = context.response.headers.get('Content-Disposition', '')
|
||||||
|
filename = ""
|
||||||
|
if content_disposition:
|
||||||
|
parts = content_disposition.split(';')
|
||||||
|
for part in parts:
|
||||||
|
if part.strip().startswith('filename'):
|
||||||
|
filename = part.split('=')[1].strip().strip('"')
|
||||||
|
break
|
||||||
|
assert filename.endswith(extension), f"Expected file extension {extension} but got {filename}. Response content: {context.response.content}"
|
||||||
|
|
||||||
|
@then('save the response file as "{filename}" for debugging')
|
||||||
|
def step_save_response_file(context, filename):
|
||||||
|
with open(filename, 'wb') as f:
|
||||||
|
f.write(context.response.content)
|
||||||
|
print(f"Saved response content to {filename}")
|
4
cucumber/requirements.txt
Normal file
4
cucumber/requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
behave
|
||||||
|
requests
|
||||||
|
PyPDF2
|
||||||
|
reportlab
|
|
@ -67,7 +67,7 @@ public class BlankPageController {
|
||||||
String pageText = textStripper.getText(document);
|
String pageText = textStripper.getText(document);
|
||||||
boolean hasText = !pageText.trim().isEmpty();
|
boolean hasText = !pageText.trim().isEmpty();
|
||||||
|
|
||||||
Boolean blank = false;
|
Boolean blank = true;
|
||||||
if (hasText) {
|
if (hasText) {
|
||||||
logger.info("page " + pageIndex + " has text, not blank");
|
logger.info("page " + pageIndex + " has text, not blank");
|
||||||
blank = false;
|
blank = false;
|
||||||
|
|
|
@ -34,7 +34,10 @@ public class PDFToFile {
|
||||||
|
|
||||||
// Get the original PDF file name without the extension
|
// Get the original PDF file name without the extension
|
||||||
String originalPdfFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
|
String originalPdfFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
|
||||||
String pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
String pdfBaseName = originalPdfFileName;
|
||||||
|
if (originalPdfFileName.contains(".")) {
|
||||||
|
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
||||||
|
}
|
||||||
|
|
||||||
Path tempInputFile = null;
|
Path tempInputFile = null;
|
||||||
Path tempOutputDir = null;
|
Path tempOutputDir = null;
|
||||||
|
@ -100,8 +103,15 @@ public class PDFToFile {
|
||||||
|
|
||||||
// Get the original PDF file name without the extension
|
// Get the original PDF file name without the extension
|
||||||
String originalPdfFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
|
String originalPdfFileName = Filenames.toSimpleFileName(inputFile.getOriginalFilename());
|
||||||
String pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
|
||||||
|
|
||||||
|
if (originalPdfFileName == null || "".equals(originalPdfFileName.trim())) {
|
||||||
|
originalPdfFileName = "output.pdf";
|
||||||
|
}
|
||||||
|
// Assume file is pdf if no extension
|
||||||
|
String pdfBaseName = originalPdfFileName;
|
||||||
|
if (originalPdfFileName.contains(".")) {
|
||||||
|
pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
||||||
|
}
|
||||||
// Validate output format
|
// Validate output format
|
||||||
List<String> allowedFormats =
|
List<String> allowedFormats =
|
||||||
Arrays.asList("doc", "docx", "odt", "ppt", "pptx", "odp", "rtf", "xml", "txt:Text");
|
Arrays.asList("doc", "docx", "odt", "ppt", "pptx", "odp", "rtf", "xml", "txt:Text");
|
||||||
|
@ -173,6 +183,7 @@ public class PDFToFile {
|
||||||
if (tempInputFile != null) Files.delete(tempInputFile);
|
if (tempInputFile != null) Files.delete(tempInputFile);
|
||||||
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
|
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
|
||||||
}
|
}
|
||||||
|
System.out.println("fileBytes=" + fileBytes.length);
|
||||||
return WebResponseUtils.bytesToWebResponse(
|
return WebResponseUtils.bytesToWebResponse(
|
||||||
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,7 @@ system:
|
||||||
defaultLocale: 'en-US' # Set the default language (e.g. 'de-DE', 'fr-FR', etc)
|
defaultLocale: 'en-US' # Set the default language (e.g. 'de-DE', 'fr-FR', etc)
|
||||||
googlevisibility: false # 'true' to allow Google visibility (via robots.txt), 'false' to disallow
|
googlevisibility: false # 'true' to allow Google visibility (via robots.txt), 'false' to disallow
|
||||||
enableAlphaFunctionality: false # Set to enable functionality which might need more testing before it fully goes live (This feature might make no changes)
|
enableAlphaFunctionality: false # Set to enable functionality which might need more testing before it fully goes live (This feature might make no changes)
|
||||||
showUpdate: true # see when a new update is available
|
showUpdate: false # see when a new update is available
|
||||||
showUpdateOnlyAdmin: false # Only admins can see when a new update is available, depending on showUpdate it must be set to 'true'
|
showUpdateOnlyAdmin: false # Only admins can see when a new update is available, depending on showUpdate it must be set to 'true'
|
||||||
customHTMLFiles: false # enable to have files placed in /customFiles/templates override the existing template html files
|
customHTMLFiles: false # enable to have files placed in /customFiles/templates override the existing template html files
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue