From 3d78e01559d4fb849ae698a4879cd8cd9a8a30b0 Mon Sep 17 00:00:00 2001
From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com.>
Date: Mon, 27 May 2024 17:53:33 +0100
Subject: [PATCH] cuke

---
 cucumber/features/external.feature          | 48 ++++++++++++++++++-
 cucumber/features/general.feature           |  9 ++--
 cucumber/features/steps/step_definitions.py | 53 +++++++++++++++++++++
 3 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/cucumber/features/external.feature b/cucumber/features/external.feature
index bf6ca9dd..1d503135 100644
--- a/cucumber/features/external.feature
+++ b/cucumber/features/external.feature
@@ -28,7 +28,28 @@ Feature: API Validation
     And the response file should have size greater than 0
 	And the response status code should be 200
 
+
   @ocr @positive
+  Scenario: Extract Image Scans
+    Given I generate a PDF file as "fileInput"
+	And the pdf contains 3 images on 2 pages
+    And the request data includes
+      | parameter        | value       |
+      | angleThreshold        | 5         |
+      | tolerance          | 20        |
+      | minArea           | 8000        |
+      | minContourArea            | 500        |
+      | borderSize       | 1        |
+    When I send the API request to the endpoint "/api/v1/misc/extract-image-scans"
+    Then the response content type should be "application/octet-stream"
+	And the response file should have extension ".zip"
+	And the response ZIP should contain 2 files
+    And the response file should have size greater than 0
+	And the response status code should be 200
+	
+	
+	
+  @ocr @negative
   Scenario: Process PDF with text and OCR with type normal 
     Given I generate a PDF file as "fileInput"
     And the pdf contains 3 pages with random text
@@ -79,6 +100,7 @@ Feature: API Validation
     When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
     Then the response content type should be "application/octet-stream"
 	And the response file should have extension ".zip"
+	And the response ZIP should contain 2 files
     And the response file should have size greater than 0
 	And the response status code should be 200
 
@@ -101,8 +123,30 @@ Feature: API Validation
     | odt    | .odt      |
     | doc    | .doc      |
 
-
-
+  @ocr
+  Scenario: PDFA
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+	And the request data includes
+      | parameter        | value     |
+      | outputFormat     | pdfa       |
+    When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
+	Then the response status code should be 200
+    And the response file should have extension ".pdf"
+    And the response file should have size greater than 100
+	
+  @ocr
+  Scenario: PDFA1
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+	And the request data includes
+      | parameter        | value     |
+      | outputFormat     | pdfa-1       |
+    When I send the API request to the endpoint "/api/v1/convert/pdf/pdfa"
+	Then the response status code should be 200
+    And the response file should have extension ".pdf"
+    And the response file should have size greater than 100
+	
   @compress @ghostscript @positive
   Scenario: Compress
     Given I generate a PDF file as "fileInput"
diff --git a/cucumber/features/general.feature b/cucumber/features/general.feature
index 81b801be..229d37b5 100644
--- a/cucumber/features/general.feature
+++ b/cucumber/features/general.feature
@@ -1,7 +1,8 @@
 @general
 Feature: API Validation
 
-  @split-pdf-by-sections
+	
+  @split-pdf-by-sections @positive
   Scenario Outline: split-pdf-by-sections with different parameters
     Given I generate a PDF file as "fileInput"
     And the pdf contains 2 pages
@@ -23,7 +24,7 @@ Feature: API Validation
     | 1                   | 2                 | 12          |
     | 2                   | 2                 | 18          |
 
-  @split-pdf-by-sections
+  @split-pdf-by-sections @positive
   Scenario Outline: split-pdf-by-sections with different parameters
     Given I generate a PDF file as "fileInput"
     And the pdf contains 2 pages
@@ -47,7 +48,7 @@ Feature: API Validation
 
 
 
-  @split-pdf-by-pages
+  @split-pdf-by-pages @positive
   Scenario Outline: split-pdf-by-pages with different parameters
   Given I generate a PDF file as "fileInput"
   And the pdf contains 20 pages
@@ -70,7 +71,7 @@ Feature: API Validation
 
 
 
-  @split-pdf-by-size-or-count
+  @split-pdf-by-size-or-count @positive
   Scenario Outline: split-pdf-by-size-or-count with different parameters
   Given I generate a PDF file as "fileInput"
   And the pdf contains 20 pages
diff --git a/cucumber/features/steps/step_definitions.py b/cucumber/features/steps/step_definitions.py
index 40ddd22b..60479b6e 100644
--- a/cucumber/features/steps/step_definitions.py
+++ b/cucumber/features/steps/step_definitions.py
@@ -66,6 +66,59 @@ def step_pdf_contains_blank_pages(context, page_count):
     context.files[context.param_name].close()
     context.files[context.param_name] = open(context.file_name, 'rb')
 
+
+
+def create_black_box_image(file_name, size):
+    can = canvas.Canvas(file_name, pagesize=size)
+    width, height = size
+    can.setFillColorRGB(0, 0, 0)
+    can.rect(0, 0, width, height, fill=1)
+    can.showPage()
+    can.save()
+
+def create_pdf_with_black_boxes(file_name, image_count, page_count):
+    page_width, page_height = letter
+    box_size = 72  # 1 inch by 1 inch black box
+    boxes_per_page = image_count // page_count + (1 if image_count % page_count != 0 else 0)
+    
+    writer = PdfWriter()
+    box_counter = 0
+    
+    for page in range(page_count):
+        packet = io.BytesIO()
+        can = canvas.Canvas(packet, pagesize=letter)
+        
+        for i in range(boxes_per_page):
+            if box_counter >= image_count:
+                break
+            x = (i % (page_width // box_size)) * box_size
+            y = page_height - ((i // (page_width // box_size) + 1) * box_size)
+            can.setFillColorRGB(0, 0, 0)
+            can.rect(x, y, box_size, box_size, fill=1)
+            box_counter += 1
+            
+        can.showPage()
+        can.save()
+        packet.seek(0)
+        new_pdf = PdfReader(packet)
+        writer.add_page(new_pdf.pages[0])
+    
+    with open(file_name, 'wb') as f:
+        writer.write(f)
+
+@given('the pdf contains {image_count:d} images on {page_count:d} pages')
+def step_pdf_contains_images(context, image_count, page_count):
+    if not hasattr(context, 'param_name'):
+        context.param_name = "default"
+    context.file_name = "genericNonCustomisableName.pdf"
+    create_pdf_with_black_boxes(context.file_name, image_count, page_count)
+    if not hasattr(context, 'files'):
+        context.files = {}
+    if context.param_name in context.files:
+        context.files[context.param_name].close()
+    context.files[context.param_name] = open(context.file_name, 'rb')
+
+    
 @given('the pdf contains {page_count:d} pages with random text')
 def step_pdf_contains_pages_with_random_text(context, page_count):
     buffer = io.BytesIO()