Skip to content

Commit 7cda940

Browse files
Sid MohanSid Mohan
Sid Mohan
authored and
Sid Mohan
committed
py311 precommit
1 parent 57eede2 commit 7cda940

File tree

5 files changed

+35
-29
lines changed

5 files changed

+35
-29
lines changed

.github/workflows/py-version-cicd.yml

+9-9
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
name: py-version-cicd
22

33
on:
4-
workflow_dispatch:
5-
inputs:
6-
version:
7-
description: "Python version to lint build"
8-
required: true
9-
confirm_tests:
10-
description: "Confirm all tests have passed"
11-
type: boolean
12-
required: true
4+
workflow_dispatch:
5+
inputs:
6+
version:
7+
description: "Python version to lint build"
8+
required: true
9+
confirm_tests:
10+
description: "Confirm all tests have passed"
11+
type: boolean
12+
required: true
1313

1414
jobs:
1515
lint:

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ loop.run_until_complete(run_ocr_pipeline_demo())
8484
Note: The DataFog library uses asynchronous programming for OCR, so make sure to use the `async`/`await` syntax when calling the appropriate methods.
8585

8686
## Examples
87-
TODO: Update README. switch installation method to install requirements-dev.txt and then -e
87+
88+
TODO: Update README. switch installation method to install requirements-dev.txt and then -e
8889

8990
For more detailed examples, check out our Jupyter notebooks in the `examples/` directory:
9091

datafog/processing/image_processing/donut_processor.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import sys
66
from io import BytesIO
77

8+
import numpy as np
89
import requests
910
from PIL import Image
10-
import numpy as np
1111

1212
from .image_downloader import ImageDownloader
1313

@@ -38,24 +38,24 @@ def ensure_installed(self, package_name):
3838

3939
def preprocess_image(self, image: Image.Image) -> np.ndarray:
4040
# Convert to RGB if the image is not already in RGB mode
41-
if image.mode != 'RGB':
42-
image = image.convert('RGB')
43-
41+
if image.mode != "RGB":
42+
image = image.convert("RGB")
43+
4444
# Convert to numpy array
4545
image_np = np.array(image)
46-
46+
4747
# Ensure the image is 3D (height, width, channels)
4848
if image_np.ndim == 2:
4949
image_np = np.expand_dims(image_np, axis=-1)
5050
image_np = np.repeat(image_np, 3, axis=-1)
51-
51+
5252
return image_np
5353

5454
async def parse_image(self, image: Image.Image) -> str:
5555
"""Process w/ DonutProcessor and VisionEncoderDecoderModel"""
5656
# Preprocess the image
5757
image_np = self.preprocess_image(image)
58-
58+
5959
task_prompt = "<s_cord-v2>"
6060
decoder_input_ids = self.processor.tokenizer(
6161
task_prompt, add_special_tokens=False, return_tensors="pt"
@@ -93,4 +93,4 @@ def download_image(self, url: str) -> Image.Image:
9393
"""Download an image from URL."""
9494
response = requests.get(url)
9595
image = Image.open(BytesIO(response.content))
96-
return image
96+
return image

datafog/services/image_service.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,34 @@
11
import asyncio
2+
import io
3+
import ssl
24
from typing import List
35

6+
import aiohttp
7+
import certifi
48
from PIL import Image
59

610
from datafog.processing.image_processing.donut_processor import DonutProcessor
11+
712
# from datafog.processing.image_processing.image_downloader import ImageDownloader
813
from datafog.processing.image_processing.pytesseract_processor import (
914
PytesseractProcessor,
1015
)
11-
import aiohttp
12-
from PIL import Image
13-
import io
14-
import ssl
15-
import certifi
16+
1617

1718
class ImageDownloader:
1819
async def download_image(self, url: str) -> Image.Image:
1920
ssl_context = ssl.create_default_context(cafile=certifi.where())
20-
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=ssl_context)) as session:
21+
async with aiohttp.ClientSession(
22+
connector=aiohttp.TCPConnector(ssl=ssl_context)
23+
) as session:
2124
async with session.get(url) as response:
2225
if response.status == 200:
2326
image_data = await response.read()
2427
return Image.open(io.BytesIO(image_data))
2528
else:
26-
raise Exception(f"Failed to download image. Status code: {response.status}")
27-
28-
29+
raise Exception(
30+
f"Failed to download image. Status code: {response.status}"
31+
)
2932

3033

3134
class ImageService:
@@ -44,7 +47,7 @@ def __init__(self, use_donut: bool = False, use_tesseract: bool = True):
4447

4548
# tasks = [asyncio.create_task(download_image(url)) for url in urls]
4649
# return await asyncio.gather(*tasks)
47-
50+
4851
async def download_images(self, urls: List[str]) -> List[Image.Image]:
4952
async def download_image(url: str) -> Image.Image:
5053
return await self.downloader.download_image(url)

tests/test_image_service.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
# use_tesseract selects pytesseract processor for OCR
1111

1212

13-
import pytest
1413
import asyncio
14+
15+
import pytest
1516
from PIL import Image
1617

1718
from datafog.services.image_service import ImageService
@@ -38,6 +39,7 @@ async def test_download_images():
3839
finally:
3940
await asyncio.sleep(0) # Allow pending callbacks to run
4041

42+
4143
@pytest.mark.asyncio
4244
async def test_ocr_extract_with_tesseract():
4345
image_service2 = ImageService(use_tesseract=True, use_donut=False)

0 commit comments

Comments
 (0)