From 56afd35c823c863f7c936d037017b6d8c705d354 Mon Sep 17 00:00:00 2001
From: Peter Dave Hello <hsu@peterdavehello.org>
Date: Thu, 4 Jan 2024 20:14:07 +0800
Subject: [PATCH 1/3] Refactor DockerfileBase to clean up apt cache after
 package installation

Changes include:
- Cleaning up the apt cache by adding `rm -rf /var/lib/apt/lists/*`
  after each package installation within the same RUN statement.
- Ensuring the Docker image size is minimized by removing unnecessary
  files immediately after use.

These adjustments will result in a more space-efficient Docker image.
---
 DockerfileBase | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/DockerfileBase b/DockerfileBase
index c913635f..4e41d3e8 100644
--- a/DockerfileBase
+++ b/DockerfileBase
@@ -6,7 +6,8 @@ FROM ubuntu:latest AS base
 # JDK for app
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-    	openjdk-17-jre
+    	openjdk-17-jre && \
+    rm -rf /var/lib/apt/lists/*
     	
 # Doc conversion
 RUN apt-get update && \
@@ -18,7 +19,8 @@ RUN apt-get update && \
         libreoffice-impress \
 		python3-uno \
 		curl \
-		unoconv
+		unoconv && \
+    rm -rf /var/lib/apt/lists/*
 		
 		
 # OCR MY PDF (unpaper for descew and other advanced featues)
@@ -30,6 +32,7 @@ apt-get update && \
         python3-pip \
         ocrmypdf \
         unpaper && \
+    rm -rf /var/lib/apt/lists/* && \
     pip install --upgrade pip && \
     pip install --no-cache-dir --upgrade ocrmypdf && \
     pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1
@@ -40,8 +43,7 @@ RUN pip install --no-cache-dir opencv-python-headless WeasyPrint
 
 
 # cleanup and etc
-RUN rm -rf /var/lib/apt/lists/* && \
-    mkdir /usr/share/tesseract-ocr-original && \
+RUN mkdir /usr/share/tesseract-ocr-original && \
     cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \
     rm -rf /usr/share/tesseract-ocr
 

From 846ebe6dda3b5b907231f214ce96e74d300e6dd2 Mon Sep 17 00:00:00 2001
From: Peter Dave Hello <hsu@peterdavehello.org>
Date: Thu, 4 Jan 2024 20:30:55 +0800
Subject: [PATCH 2/3] Refine Tesseract-OCR file backup process in
 DockerfileBase

---
 DockerfileBase | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/DockerfileBase b/DockerfileBase
index 4e41d3e8..ffc8a89e 100644
--- a/DockerfileBase
+++ b/DockerfileBase
@@ -33,6 +33,7 @@ apt-get update && \
         ocrmypdf \
         unpaper && \
     rm -rf /var/lib/apt/lists/* && \
+    mv /usr/share/tesseract-ocr /usr/share/tesseract-ocr-original && \
     pip install --upgrade pip && \
     pip install --no-cache-dir --upgrade ocrmypdf && \
     pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1
@@ -40,13 +41,3 @@ apt-get update && \
     
 #CV and HTML   
 RUN pip install --no-cache-dir opencv-python-headless WeasyPrint 
-
-
-# cleanup and etc
-RUN mkdir /usr/share/tesseract-ocr-original && \
-    cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \
-    rm -rf /usr/share/tesseract-ocr
-
-
-
-    
\ No newline at end of file

From ecb62e0c94bc6711ed6afdb71ccbdd575081988e Mon Sep 17 00:00:00 2001
From: Peter Dave Hello <hsu@peterdavehello.org>
Date: Thu, 4 Jan 2024 20:56:24 +0800
Subject: [PATCH 3/3] Apply `--no-cache-dir` to pip upgrade in DockerfileBase

Aligned pip upgrade command with others by adding the `--no-cache-dir`
flag to reduce image layer size.
---
 DockerfileBase | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DockerfileBase b/DockerfileBase
index ffc8a89e..b37050c7 100644
--- a/DockerfileBase
+++ b/DockerfileBase
@@ -34,7 +34,7 @@ apt-get update && \
         unpaper && \
     rm -rf /var/lib/apt/lists/* && \
     mv /usr/share/tesseract-ocr /usr/share/tesseract-ocr-original && \
-    pip install --upgrade pip && \
+    pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir --upgrade ocrmypdf && \
     pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1