From 3d78e01559d4fb849ae698a4879cd8cd9a8a30b0 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com.> Date: Mon, 27 May 2024 17:53:33 +0100 Subject: [PATCH] cuke --- cucumber/features/external.feature | 48 ++++++++++++++++++- cucumber/features/general.feature | 9 ++-- cucumber/features/steps/step_definitions.py | 53 +++++++++++++++++++++ 3 files changed, 104 insertions(+), 6 deletions(-) diff --git a/cucumber/features/external.feature b/cucumber/features/external.feature index bf6ca9dd..1d503135 100644 --- a/cucumber/features/external.feature +++ b/cucumber/features/external.feature @@ -28,7 +28,28 @@ Feature: API Validation And the response file should have size greater than 0 And the response status code should be 200 + @ocr @positive + Scenario: Extract Image Scans + Given I generate a PDF file as "fileInput" + And the pdf contains 3 images on 2 pages + And the request data includes + | parameter | value | + | angleThreshold | 5 | + | tolerance | 20 | + | minArea | 8000 | + | minContourArea | 500 | + | borderSize | 1 | + When I send the API request to the endpoint "/api/v1/misc/extract-image-scans" + Then the response content type should be "application/octet-stream" + And the response file should have extension ".zip" + And the response ZIP should contain 2 files + And the response file should have size greater than 0 + And the response status code should be 200 + + + + @ocr @negative Scenario: Process PDF with text and OCR with type normal Given I generate a PDF file as "fileInput" And the pdf contains 3 pages with random text @@ -79,6 +100,7 @@ Feature: API Validation When I send the API request to the endpoint "/api/v1/misc/ocr-pdf" Then the response content type should be "application/octet-stream" And the response file should have extension ".zip" + And the response ZIP should contain 2 files And the response file should have size greater than 0 And the response status code should be 200 @@ -101,8 +123,30 @@ Feature: API Validation | odt | .odt | | doc | .doc | - - + @ocr + Scenario: PDFA + Given I generate a PDF file as "fileInput" + And the pdf contains 3 pages with random text + And the request data includes + | parameter | value | + | outputFormat | pdfa | + When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa" + Then the response status code should be 200 + And the response file should have extension ".pdf" + And the response file should have size greater than 100 + + @ocr + Scenario: PDFA1 + Given I generate a PDF file as "fileInput" + And the pdf contains 3 pages with random text + And the request data includes + | parameter | value | + | outputFormat | pdfa-1 | + When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa" + Then the response status code should be 200 + And the response file should have extension ".pdf" + And the response file should have size greater than 100 + @compress @ghostscript @positive Scenario: Compress Given I generate a PDF file as "fileInput" diff --git a/cucumber/features/general.feature b/cucumber/features/general.feature index 81b801be..229d37b5 100644 --- a/cucumber/features/general.feature +++ b/cucumber/features/general.feature @@ -1,7 +1,8 @@ @general Feature: API Validation - @split-pdf-by-sections + + @split-pdf-by-sections @positive Scenario Outline: split-pdf-by-sections with different parameters Given I generate a PDF file as "fileInput" And the pdf contains 2 pages @@ -23,7 +24,7 @@ Feature: API Validation | 1 | 2 | 12 | | 2 | 2 | 18 | - @split-pdf-by-sections + @split-pdf-by-sections @positive Scenario Outline: split-pdf-by-sections with different parameters Given I generate a PDF file as "fileInput" And the pdf contains 2 pages @@ -47,7 +48,7 @@ Feature: API Validation - @split-pdf-by-pages + @split-pdf-by-pages @positive Scenario Outline: split-pdf-by-pages with different parameters Given I generate a PDF file as "fileInput" And the pdf contains 20 pages @@ -70,7 +71,7 @@ Feature: API Validation - @split-pdf-by-size-or-count + @split-pdf-by-size-or-count @positive Scenario Outline: split-pdf-by-size-or-count with different parameters Given I generate a PDF file as "fileInput" And the pdf contains 20 pages diff --git a/cucumber/features/steps/step_definitions.py b/cucumber/features/steps/step_definitions.py index 40ddd22b..60479b6e 100644 --- a/cucumber/features/steps/step_definitions.py +++ b/cucumber/features/steps/step_definitions.py @@ -66,6 +66,59 @@ def step_pdf_contains_blank_pages(context, page_count): context.files[context.param_name].close() context.files[context.param_name] = open(context.file_name, 'rb') + + +def create_black_box_image(file_name, size): + can = canvas.Canvas(file_name, pagesize=size) + width, height = size + can.setFillColorRGB(0, 0, 0) + can.rect(0, 0, width, height, fill=1) + can.showPage() + can.save() + +def create_pdf_with_black_boxes(file_name, image_count, page_count): + page_width, page_height = letter + box_size = 72 # 1 inch by 1 inch black box + boxes_per_page = image_count // page_count + (1 if image_count % page_count != 0 else 0) + + writer = PdfWriter() + box_counter = 0 + + for page in range(page_count): + packet = io.BytesIO() + can = canvas.Canvas(packet, pagesize=letter) + + for i in range(boxes_per_page): + if box_counter >= image_count: + break + x = (i % (page_width // box_size)) * box_size + y = page_height - ((i // (page_width // box_size) + 1) * box_size) + can.setFillColorRGB(0, 0, 0) + can.rect(x, y, box_size, box_size, fill=1) + box_counter += 1 + + can.showPage() + can.save() + packet.seek(0) + new_pdf = PdfReader(packet) + writer.add_page(new_pdf.pages[0]) + + with open(file_name, 'wb') as f: + writer.write(f) + +@given('the pdf contains {image_count:d} images on {page_count:d} pages') +def step_pdf_contains_images(context, image_count, page_count): + if not hasattr(context, 'param_name'): + context.param_name = "default" + context.file_name = "genericNonCustomisableName.pdf" + create_pdf_with_black_boxes(context.file_name, image_count, page_count) + if not hasattr(context, 'files'): + context.files = {} + if context.param_name in context.files: + context.files[context.param_name].close() + context.files[context.param_name] = open(context.file_name, 'rb') + + @given('the pdf contains {page_count:d} pages with random text') def step_pdf_contains_pages_with_random_text(context, page_count): buffer = io.BytesIO()