py311 precommit

Sid Mohan · Sid Mohan · commit 7cda940316c1 · 2024-08-05T16:25:25.000-07:00
diff --git a/.github/workflows/py-version-cicd.yml b/.github/workflows/py-version-cicd.yml
@@ -1,15 +1,15 @@
 name: py-version-cicd
 
 on:
-    workflow_dispatch:
-        inputs:
-          version:
-            description: "Python version to lint build"
-            required: true
-          confirm_tests:
-            description: "Confirm all tests have passed"
-            type: boolean
-            required: true
+  workflow_dispatch:
+    inputs:
+      version:
+        description: "Python version to lint build"
+        required: true
+      confirm_tests:
+        description: "Confirm all tests have passed"
+        type: boolean
+        required: true
 
 jobs:
   lint:
diff --git a/README.md b/README.md
@@ -84,7 +84,8 @@ loop.run_until_complete(run_ocr_pipeline_demo())
 Note: The DataFog library uses asynchronous programming for OCR, so make sure to use the `async`/`await` syntax when calling the appropriate methods.
 
 ## Examples
-TODO: Update README.  switch installation method to install requirements-dev.txt and then -e 
+
+TODO: Update README. switch installation method to install requirements-dev.txt and then -e
 
 For more detailed examples, check out our Jupyter notebooks in the `examples/` directory:
 
diff --git a/datafog/processing/image_processing/donut_processor.py b/datafog/processing/image_processing/donut_processor.py
@@ -5,9 +5,9 @@
 import sys
 from io import BytesIO
 
+import numpy as np
 import requests
 from PIL import Image
-import numpy as np
 
 from .image_downloader import ImageDownloader
 
@@ -38,24 +38,24 @@ def ensure_installed(self, package_name):
 
     def preprocess_image(self, image: Image.Image) -> np.ndarray:
         # Convert to RGB if the image is not already in RGB mode
-        if image.mode != 'RGB':
-            image = image.convert('RGB')
-        
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+
         # Convert to numpy array
         image_np = np.array(image)
-        
+
         # Ensure the image is 3D (height, width, channels)
         if image_np.ndim == 2:
             image_np = np.expand_dims(image_np, axis=-1)
             image_np = np.repeat(image_np, 3, axis=-1)
-        
+
         return image_np
 
     async def parse_image(self, image: Image.Image) -> str:
         """Process w/ DonutProcessor and VisionEncoderDecoderModel"""
         # Preprocess the image
         image_np = self.preprocess_image(image)
-        
+
         task_prompt = "<s_cord-v2>"
         decoder_input_ids = self.processor.tokenizer(
             task_prompt, add_special_tokens=False, return_tensors="pt"
@@ -93,4 +93,4 @@ def download_image(self, url: str) -> Image.Image:
         """Download an image from URL."""
         response = requests.get(url)
         image = Image.open(BytesIO(response.content))
-        return image
+        return image
diff --git a/datafog/services/image_service.py b/datafog/services/image_service.py
@@ -1,31 +1,34 @@
 import asyncio
+import io
+import ssl
 from typing import List
 
+import aiohttp
+import certifi
 from PIL import Image
 
 from datafog.processing.image_processing.donut_processor import DonutProcessor
+
 # from datafog.processing.image_processing.image_downloader import ImageDownloader
 from datafog.processing.image_processing.pytesseract_processor import (
     PytesseractProcessor,
 )
-import aiohttp
-from PIL import Image
-import io
-import ssl
-import certifi
+
 
 class ImageDownloader:
     async def download_image(self, url: str) -> Image.Image:
         ssl_context = ssl.create_default_context(cafile=certifi.where())
-        async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=ssl_context)) as session:
+        async with aiohttp.ClientSession(
+            connector=aiohttp.TCPConnector(ssl=ssl_context)
+        ) as session:
             async with session.get(url) as response:
                 if response.status == 200:
                     image_data = await response.read()
                     return Image.open(io.BytesIO(image_data))
                 else:
-                    raise Exception(f"Failed to download image. Status code: {response.status}")
-
-
+                    raise Exception(
+                        f"Failed to download image. Status code: {response.status}"
+                    )
 
 
 class ImageService:
@@ -44,7 +47,7 @@ def __init__(self, use_donut: bool = False, use_tesseract: bool = True):
 
     #     tasks = [asyncio.create_task(download_image(url)) for url in urls]
     #     return await asyncio.gather(*tasks)
-    
+
     async def download_images(self, urls: List[str]) -> List[Image.Image]:
         async def download_image(url: str) -> Image.Image:
             return await self.downloader.download_image(url)
diff --git a/tests/test_image_service.py b/tests/test_image_service.py
@@ -10,8 +10,9 @@
 # use_tesseract selects pytesseract processor for OCR
 
 
-import pytest
 import asyncio
+
+import pytest
 from PIL import Image
 
 from datafog.services.image_service import ImageService
@@ -38,6 +39,7 @@ async def test_download_images():
     finally:
         await asyncio.sleep(0)  # Allow pending callbacks to run
 
+
 @pytest.mark.asyncio
 async def test_ocr_extract_with_tesseract():
     image_service2 = ImageService(use_tesseract=True, use_donut=False)