cucumber

2024-05-26 15:58:33 +01:00 · 2024-05-26 15:58:33 +01:00 · 3ae891c62e
commit 3ae891c62e
parent 48bd060d6e
5 changed files with 473 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -125,3 +125,6 @@ watchedFolders/
 # Ignore Mac DS_Store files
 .DS_Store
 **/.DS_Store
 #cucumber
 /cucumber/reports/**
--- a/cucumber/features/environment.py
+++ b/cucumber/features/environment.py
@ -0,0 +1,16 @@
 import os
 def before_all(context):
    context.endpoint = None
    context.request_data = None
    context.files = {}
    context.response = None
 def after_scenario(context, scenario):
    if hasattr(context, 'files'):
        for file in context.files.values():
            file.close()
    if os.path.exists('response_file'):
        os.remove('response_file')
    if hasattr(context, 'file_name') and os.path.exists(context.file_name):
        os.remove(context.file_name)
--- a/cucumber/features/examples.feature
+++ b/cucumber/features/examples.feature
@ -0,0 +1,242 @@
 Feature: API Validation
  Scenario: Remove password 
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
    And the pdf is encrypted with password "password123"
    And the request data includes
      | parameter | value       |
      | password  | password123 |
    When I send the API request to the endpoint "/api/v1/security/remove-password"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 0
    And the response PDF is not passworded
 	And the response status code should be 200
  Scenario: Remove password wrong password
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
    And the pdf is encrypted with password "password123"
    And the request data includes
      | parameter | value       |
      | password  | wrongPassword |
    When I send the API request to the endpoint "/api/v1/security/remove-password"
    Then the response status code should be 500
    And the response should contain error message "Internal Server Error"
  Scenario: Get info
    Given I generate a PDF file as "fileInput"
    When I send the API request to the endpoint "/api/v1/security/get-info-on-pdf"
    Then the response content type should be "application/json"
    And the response file should have size greater than 100
 	And the response status code should be 200
  Scenario: Add password
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
    And the request data includes
      | parameter | value       |
      | password  | password123 |
    When I send the API request to the endpoint "/api/v1/security/add-password"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 100
    And the response PDF is passworded
 	And the response status code should be 200
  Scenario: Add password with other params 
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
    And the request data includes
      | parameter      | value       |
      | ownerPassword  | ownerPass   |
      | password       | password123 |
      | keyLength      | 256         |
      | canPrint       | true        |
      | canModify      | false       |
    When I send the API request to the endpoint "/api/v1/security/add-password"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 100
    And the response PDF is passworded
 	And the response status code should be 200
  Scenario: Add watermark
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
    And the request data includes
      | parameter     | value            |
      | watermarkType | text             |
      | watermarkText | Sample Watermark |
      | fontSize      | 30               |
      | rotation      | 45               |
      | opacity       | 0.5              |
      | widthSpacer   | 50               |
      | heightSpacer  | 50               |
    When I send the API request to the endpoint "/api/v1/security/add-watermark"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 100
 	And the response status code should be 200
  Scenario: Repair PDF
    Given I generate a PDF file as "fileInput"
    When I send the API request to the endpoint "/api/v1/misc/repair"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 0
 	And the response status code should be 200
  Scenario: Remove blank pages
    Given I generate a PDF file as "fileInput"
 	And the pdf contains 3 blank pages
    And the request data includes
      | parameter    | value       |
      | threshold    | 90          |
      | whitePercent | 99.9        |
    When I send the API request to the endpoint "/api/v1/misc/remove-blanks"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 0
    And the response PDF should contain 0 pages
 	And the response status code should be 200
  @ocr
  Scenario: Process PDF with OCR
    Given I generate a PDF file as "fileInput"
    And the request data includes
      | parameter        | value       |
      | languages        | eng         |
      | sidecar          | false        |
      | deskew           | true        |
      | clean            | true        |
      | cleanFinal       | true        |
      | ocrType          | Normal      |
      | ocrRenderType    | hocr        |
      | removeImagesAfter| false       |
    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 0
 	And the response status code should be 200
  @ocr
  Scenario: Process PDF with text and OCR with type normal 
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages with random text
    And the request data includes
      | parameter        | value       |
      | languages        | eng         |
      | sidecar          | false        |
      | deskew           | true        |
      | clean            | true        |
      | cleanFinal       | true        |
      | ocrType          | Normal      |
      | ocrRenderType    | hocr        |
      | removeImagesAfter| false       |
    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
 	Then the response status code should be 500
  @ocr
  Scenario: Process PDF with OCR
    Given I generate a PDF file as "fileInput"
    And the request data includes
      | parameter        | value       |
      | languages        | eng         |
      | sidecar          | false        |
      | deskew           | true        |
      | clean            | true        |
      | cleanFinal       | true        |
      | ocrType          | Force      |
      | ocrRenderType    | hocr        |
      | removeImagesAfter| false       |
    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 0
 	And the response status code should be 200
  @ocr
  Scenario: Process PDF with OCR with sidecar
    Given I generate a PDF file as "fileInput"
    And the request data includes
      | parameter        | value       |
      | languages        | eng         |
      | sidecar          | true        |
      | deskew           | true        |
      | clean            | true        |
      | cleanFinal       | true        |
      | ocrType          | Force      |
      | ocrRenderType    | hocr        |
      | removeImagesAfter| false       |
    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
    Then the response content type should be "application/octet-stream"
 	And the response file should have extension ".zip"
    And the response file should have size greater than 0
 	And the response status code should be 200
  Scenario: Flatten PDF
    Given I generate a PDF file as "fileInput"
    And the request data includes
      | parameter         | value   |
      | flattenOnlyForms  | false    |
    When I send the API request to the endpoint "/api/v1/misc/flatten"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 0
 	And the response status code should be 200
  Scenario: Update metadata
    Given I generate a PDF file as "fileInput"
    And the request data includes
      | parameter        | value             |
      | author           | John Doe          |
      | title            | Sample Title      |
      | subject          | Sample Subject    |
      | keywords         | sample, test      |
      | producer         | Test Producer     |
    When I send the API request to the endpoint "/api/v1/misc/update-metadata"
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 0
    And the response PDF metadata should include "Author" as "John Doe"
 	And the response PDF metadata should include "Keywords" as "sample, test"
 	And the response PDF metadata should include "Subject" as "Sample Subject"
 	And the response PDF metadata should include "Title" as "Sample Title"
 	And the response status code should be 200
  @libre
  Scenario: Convert PDF to DOCX
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages with random text
 	And the request data includes
      | parameter        | value      |
      | outputFormat     | docx       |
    When I send the API request to the endpoint "/api/v1/convert/pdf/word"
 	Then the response status code should be 200
    And the response file should have size greater than 100
    And the response file should have extension ".docx"
 #    And the response DOCX should contain 3 pages
  @libre
  Scenario: Convert PDF to ODT
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages with random text
 	And the request data includes
      | parameter        | value     |
      | outputFormat     | odt       |
    When I send the API request to the endpoint "/api/v1/convert/pdf/word"
 	Then the response status code should be 200
    And the response file should have size greater than 100
    And the response file should have extension ".odt"
 #   And the response ODT should contain 3 pages
  @libre
  Scenario: Convert PDF to DOC
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages with random text
 	And the request data includes
      | parameter        | value     |
      | outputFormat     | doc       |
    When I send the API request to the endpoint "/api/v1/convert/pdf/word"
 	Then the response status code should be 200
    And the response file should have extension ".doc"
    And the response file should have size greater than 100
 #    And the response DOC should contain 3 pages
--- a/cucumber/features/steps/step_definitions.py
+++ b/cucumber/features/steps/step_definitions.py
@ -0,0 +1,207 @@
 import os
 import requests
 from behave import given, when, then
 from PyPDF2 import PdfWriter, PdfReader
 import io
 import random
 import string
 from reportlab.lib.pagesizes import letter
 from reportlab.pdfgen import canvas
 import mimetypes
 import requests
 #########
 # GIVEN #
 #########
@given('I generate a PDF file as "{fileInput}"')
 def step_generate_pdf(context, fileInput):
    context.param_name = fileInput
    context.file_name = "genericNonCustomisableName.pdf"
    writer = PdfWriter()
    writer.add_blank_page(width=72, height=72)  # Single blank page
    with open(context.file_name, 'wb') as f:
        writer.write(f)
    if not hasattr(context, 'files'):
        context.files = {}
    context.files[context.param_name] = open(context.file_name, 'rb')
@given('the pdf contains {page_count:d} pages')
 def step_pdf_contains_pages(context, page_count):
    writer = PdfWriter()
    for i in range(page_count):
        writer.add_blank_page(width=72, height=72)
    with open(context.file_name, 'wb') as f:
        writer.write(f)
    context.files[context.param_name].close()
    context.files[context.param_name] = open(context.file_name, 'rb')
 # Duplicate for now...
@given('the pdf contains {page_count:d} blank pages')
 def step_pdf_contains_blank_pages(context, page_count):
    writer = PdfWriter()
    for i in range(page_count):
        writer.add_blank_page(width=72, height=72)
    with open(context.file_name, 'wb') as f:
        writer.write(f)
    context.files[context.param_name].close()
    context.files[context.param_name] = open(context.file_name, 'rb')
@given('the pdf contains {page_count:d} pages with random text')
 def step_pdf_contains_pages_with_random_text(context, page_count):
    buffer = io.BytesIO()
    c = canvas.Canvas(buffer, pagesize=letter)
    width, height = letter
    for _ in range(page_count):
        text = ''.join(random.choices(string.ascii_letters + string.digits, k=100))
        c.drawString(100, height - 100, text)
        c.showPage()
    c.save()
    with open(context.file_name, 'wb') as f:
        f.write(buffer.getvalue())
    context.files[context.param_name].close()
    context.files[context.param_name] = open(context.file_name, 'rb')
@given('the pdf pages all contain the text "{text}"')
 def step_pdf_pages_contain_text(context, text):
    buffer = io.BytesIO()
    c = canvas.Canvas(buffer, pagesize=letter)
    width, height = letter
    for _ in range(len(PdfReader(context.file_name).pages)):
        c.drawString(100, height - 100, text)
        c.showPage()
    c.save()
    with open(context.file_name, 'wb') as f:
        f.write(buffer.getvalue())
    context.files[context.param_name].close()
    context.files[context.param_name] = open(context.file_name, 'rb')
@given('the pdf is encrypted with password "{password}"')
 def step_encrypt_pdf(context, password):
    writer = PdfWriter()
    reader = PdfReader(context.file_name)
    for i in range(len(reader.pages)):
        writer.add_page(reader.pages[i])
    writer.encrypt(password)
    with open(context.file_name, 'wb') as f:
        writer.write(f)
    context.files[context.param_name].close()
    context.files[context.param_name] = open(context.file_name, 'rb')
@given('the request data is')
 def step_request_data(context):
    context.request_data = eval(context.text)
@given('the request data includes')
 def step_request_data_table(context):
    context.request_data = {row['parameter']: row['value'] for row in context.table}
@given('save the generated PDF file as "{filename}" for debugging')
 def save_generated_pdf(context, filename):
    with open(filename, 'wb') as f:
        f.write(context.files[context.param_name].read())
    print(f"Saved generated PDF content to {filename}")
 ########
 # WHEN #
 ########
@when('I send the API request to the endpoint "{endpoint}"')
 def step_send_api_request(context, endpoint):
    url = f"http://localhost:8080{endpoint}"
    files = context.files if hasattr(context, 'files') else {}
    if not hasattr(context, 'request_data') or context.request_data is None:
        context.request_data = {}
    form_data = []
    for key, value in context.request_data.items():
        form_data.append((key, (None, value)))
    for key, file in files.items():
        mime_type, _ = mimetypes.guess_type(file.name)
        mime_type = mime_type or 'application/octet-stream'
        print("form_data " + file.name + " with " + mime_type)
        form_data.append((key, (file.name, file, mime_type)))
    response = requests.post(url, files=form_data)
    context.response = response
 ########
 # THEN #
 ########
@then('the response content type should be "{content_type}"')
 def step_check_response_content_type(context, content_type):
    actual_content_type = context.response.headers.get('Content-Type', '')
    assert actual_content_type.startswith(content_type), f"Expected {content_type} but got {actual_content_type}. Response content: {context.response.content}"
@then('the response file should have size greater than {size:d}')
 def step_check_response_file_size(context, size):
    response_file = io.BytesIO(context.response.content)
    assert len(response_file.getvalue()) > size
@then('the response PDF is not passworded')
 def step_check_response_pdf_not_passworded(context):
    response_file = io.BytesIO(context.response.content)
    reader = PdfReader(response_file)
    assert not reader.is_encrypted
@then('the response PDF is passworded')
 def step_check_response_pdf_passworded(context):
    response_file = io.BytesIO(context.response.content)
    try:
        reader = PdfReader(response_file)
        assert reader.is_encrypted
    except PdfReadError as e:
        raise AssertionError(f"Failed to read PDF: {str(e)}. Response content: {context.response.content}")
    except Exception as e:
        raise AssertionError(f"An error occurred: {str(e)}. Response content: {context.response.content}")
@then('the response status code should be {status_code:d}')
 def step_check_response_status_code(context, status_code):
    assert context.response.status_code == status_code, f"Expected status code {status_code} but got {context.response.status_code}"
@then('the response should contain error message "{message}"')
 def step_check_response_error_message(context, message):
    response_json = context.response.json()
    assert response_json.get('error') == message, f"Expected error message '{message}' but got '{response_json.get('error')}'"
@then('the response PDF should contain {page_count:d} pages')
 def step_check_response_pdf_page_count(context, page_count):
    response_file = io.BytesIO(context.response.content)
    reader = PdfReader(response_file)
    assert len(reader.pages) == page_count, f"Expected {page_count} pages but got {len(reader.pages)} pages"
@then('the response PDF metadata should include "{metadata_key}" as "{metadata_value}"')
 def step_check_response_pdf_metadata(context, metadata_key, metadata_value):
    response_file = io.BytesIO(context.response.content)
    reader = PdfReader(response_file)
    metadata = reader.metadata
    assert metadata.get("/" + metadata_key) == metadata_value, f"Expected {metadata_key} to be '{metadata_value}' but got '{metadata.get(metadata_key)}'"
@then('the response file should have extension "{extension}"')
 def step_check_response_file_extension(context, extension):
    content_disposition = context.response.headers.get('Content-Disposition', '')
    filename = ""
    if content_disposition:
        parts = content_disposition.split(';')
        for part in parts:
            if part.strip().startswith('filename'):
                filename = part.split('=')[1].strip().strip('"')
                break
    assert filename.endswith(extension), f"Expected file extension {extension} but got {filename}. Response content: {context.response.content}"
@then('save the response file as "{filename}" for debugging')
 def step_save_response_file(context, filename):
    with open(filename, 'wb') as f:
        f.write(context.response.content)
    print(f"Saved response content to {filename}")
--- a/cucumber/requirements.txt
+++ b/cucumber/requirements.txt
@ -0,0 +1,4 @@
 behave
 requests
 PyPDF2
 reportlab