This commit is contained in:
Anthony Stirling 2024-05-27 17:53:33 +01:00
parent 65f9438639
commit 3d78e01559
3 changed files with 104 additions and 6 deletions

View file

@ -28,7 +28,28 @@ Feature: API Validation
And the response file should have size greater than 0 And the response file should have size greater than 0
And the response status code should be 200 And the response status code should be 200
@ocr @positive @ocr @positive
Scenario: Extract Image Scans
Given I generate a PDF file as "fileInput"
And the pdf contains 3 images on 2 pages
And the request data includes
| parameter | value |
| angleThreshold | 5 |
| tolerance | 20 |
| minArea | 8000 |
| minContourArea | 500 |
| borderSize | 1 |
When I send the API request to the endpoint "/api/v1/misc/extract-image-scans"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0
And the response status code should be 200
@ocr @negative
Scenario: Process PDF with text and OCR with type normal Scenario: Process PDF with text and OCR with type normal
Given I generate a PDF file as "fileInput" Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text And the pdf contains 3 pages with random text
@ -79,6 +100,7 @@ Feature: API Validation
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf" When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/octet-stream" Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip" And the response file should have extension ".zip"
And the response ZIP should contain 2 files
And the response file should have size greater than 0 And the response file should have size greater than 0
And the response status code should be 200 And the response status code should be 200
@ -101,7 +123,29 @@ Feature: API Validation
| odt | .odt | | odt | .odt |
| doc | .doc | | doc | .doc |
@ocr
Scenario: PDFA
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | pdfa |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@ocr
Scenario: PDFA1
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | pdfa-1 |
When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @ghostscript @positive @compress @ghostscript @positive
Scenario: Compress Scenario: Compress

View file

@ -1,7 +1,8 @@
@general @general
Feature: API Validation Feature: API Validation
@split-pdf-by-sections
@split-pdf-by-sections @positive
Scenario Outline: split-pdf-by-sections with different parameters Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput" Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages And the pdf contains 2 pages
@ -23,7 +24,7 @@ Feature: API Validation
| 1 | 2 | 12 | | 1 | 2 | 12 |
| 2 | 2 | 18 | | 2 | 2 | 18 |
@split-pdf-by-sections @split-pdf-by-sections @positive
Scenario Outline: split-pdf-by-sections with different parameters Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput" Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages And the pdf contains 2 pages
@ -47,7 +48,7 @@ Feature: API Validation
@split-pdf-by-pages @split-pdf-by-pages @positive
Scenario Outline: split-pdf-by-pages with different parameters Scenario Outline: split-pdf-by-pages with different parameters
Given I generate a PDF file as "fileInput" Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages And the pdf contains 20 pages
@ -70,7 +71,7 @@ Feature: API Validation
@split-pdf-by-size-or-count @split-pdf-by-size-or-count @positive
Scenario Outline: split-pdf-by-size-or-count with different parameters Scenario Outline: split-pdf-by-size-or-count with different parameters
Given I generate a PDF file as "fileInput" Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages And the pdf contains 20 pages

View file

@ -66,6 +66,59 @@ def step_pdf_contains_blank_pages(context, page_count):
context.files[context.param_name].close() context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb') context.files[context.param_name] = open(context.file_name, 'rb')
def create_black_box_image(file_name, size):
can = canvas.Canvas(file_name, pagesize=size)
width, height = size
can.setFillColorRGB(0, 0, 0)
can.rect(0, 0, width, height, fill=1)
can.showPage()
can.save()
def create_pdf_with_black_boxes(file_name, image_count, page_count):
page_width, page_height = letter
box_size = 72 # 1 inch by 1 inch black box
boxes_per_page = image_count // page_count + (1 if image_count % page_count != 0 else 0)
writer = PdfWriter()
box_counter = 0
for page in range(page_count):
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
for i in range(boxes_per_page):
if box_counter >= image_count:
break
x = (i % (page_width // box_size)) * box_size
y = page_height - ((i // (page_width // box_size) + 1) * box_size)
can.setFillColorRGB(0, 0, 0)
can.rect(x, y, box_size, box_size, fill=1)
box_counter += 1
can.showPage()
can.save()
packet.seek(0)
new_pdf = PdfReader(packet)
writer.add_page(new_pdf.pages[0])
with open(file_name, 'wb') as f:
writer.write(f)
@given('the pdf contains {image_count:d} images on {page_count:d} pages')
def step_pdf_contains_images(context, image_count, page_count):
if not hasattr(context, 'param_name'):
context.param_name = "default"
context.file_name = "genericNonCustomisableName.pdf"
create_pdf_with_black_boxes(context.file_name, image_count, page_count)
if not hasattr(context, 'files'):
context.files = {}
if context.param_name in context.files:
context.files[context.param_name].close()
context.files[context.param_name] = open(context.file_name, 'rb')
@given('the pdf contains {page_count:d} pages with random text') @given('the pdf contains {page_count:d} pages with random text')
def step_pdf_contains_pages_with_random_text(context, page_count): def step_pdf_contains_pages_with_random_text(context, page_count):
buffer = io.BytesIO() buffer = io.BytesIO()