Skip to content

Commit cf36dd6

Browse files
Sid MohanSid Mohan
authored andcommitted
v3.2.1 release
1 parent f56db58 commit cf36dd6

File tree

5 files changed

+74
-152
lines changed

5 files changed

+74
-152
lines changed

datafog/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "3.2.1b3"
1+
__version__ = "3.2.1"

examples/getting_started.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
}
4848
],
4949
"source": [
50-
"!pip install --upgrade datafog --pre --quiet"
50+
"!pip install --upgrade datafog --quiet"
5151
]
5252
},
5353
{

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
def __version__():
9-
return "3.2.1b9"
9+
return "3.2.1"
1010

1111

1212
project_urls = {

tests/beta_pypi_test.ipynb

Lines changed: 9 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 18,
5+
"execution_count": 57,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -19,7 +19,7 @@
1919
},
2020
{
2121
"cell_type": "code",
22-
"execution_count": 19,
22+
"execution_count": 58,
2323
"metadata": {},
2424
"outputs": [],
2525
"source": [
@@ -32,7 +32,7 @@
3232
},
3333
{
3434
"cell_type": "code",
35-
"execution_count": 20,
35+
"execution_count": 59,
3636
"metadata": {},
3737
"outputs": [],
3838
"source": [
@@ -41,95 +41,17 @@
4141
},
4242
{
4343
"cell_type": "code",
44-
"execution_count": 21,
44+
"execution_count": 60,
4545
"metadata": {},
46-
"outputs": [
47-
{
48-
"name": "stdout",
49-
"output_type": "stream",
50-
"text": [
51-
"Obtaining file:///Users/sidmohan/Desktop/datafog_local/datafog-python/datafog-python\n",
52-
" Installing build dependencies ... \u001b[?25ldone\n",
53-
"\u001b[?25h Checking if build backend supports build_editable ... \u001b[?25ldone\n",
54-
"\u001b[?25h Getting requirements to build editable ... \u001b[?25ldone\n",
55-
"\u001b[?25h Preparing editable metadata (pyproject.toml) ... \u001b[?25ldone\n",
56-
"\u001b[?25hRequirement already satisfied: pandas in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (2.2.2)\n",
57-
"Requirement already satisfied: Requests==2.31.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (2.31.0)\n",
58-
"Requirement already satisfied: spacy==3.4.4 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (3.4.4)\n",
59-
"Requirement already satisfied: en-spacy-pii-fast==0.0.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (0.0.0)\n",
60-
"Requirement already satisfied: pydantic==1.10.15 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (1.10.15)\n",
61-
"Requirement already satisfied: Pillow in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (10.3.0)\n",
62-
"Requirement already satisfied: sentencepiece in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (0.2.0)\n",
63-
"Requirement already satisfied: protobuf in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (4.25.3)\n",
64-
"Requirement already satisfied: pytesseract in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (0.3.10)\n",
65-
"Requirement already satisfied: aiohttp in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (3.9.5)\n",
66-
"Requirement already satisfied: pytest-asyncio in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from datafog==3.2.1b3) (0.23.6)\n",
67-
"Requirement already satisfied: typing-extensions>=4.2.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from pydantic==1.10.15->datafog==3.2.1b3) (4.11.0)\n",
68-
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from Requests==2.31.0->datafog==3.2.1b3) (3.3.2)\n",
69-
"Requirement already satisfied: idna<4,>=2.5 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from Requests==2.31.0->datafog==3.2.1b3) (3.7)\n",
70-
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from Requests==2.31.0->datafog==3.2.1b3) (2.2.1)\n",
71-
"Requirement already satisfied: certifi>=2017.4.17 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from Requests==2.31.0->datafog==3.2.1b3) (2024.2.2)\n",
72-
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.10 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (3.0.12)\n",
73-
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (1.0.5)\n",
74-
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (1.0.10)\n",
75-
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (2.0.8)\n",
76-
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (3.0.9)\n",
77-
"Requirement already satisfied: thinc<8.2.0,>=8.1.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (8.1.12)\n",
78-
"Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (0.10.1)\n",
79-
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (2.4.8)\n",
80-
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (2.0.10)\n",
81-
"Requirement already satisfied: typer<0.8.0,>=0.3.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (0.7.0)\n",
82-
"Requirement already satisfied: pathy>=0.3.5 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (0.11.0)\n",
83-
"Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (5.2.1)\n",
84-
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (4.66.4)\n",
85-
"Requirement already satisfied: numpy>=1.15.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (1.26.4)\n",
86-
"Requirement already satisfied: jinja2 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (3.1.4)\n",
87-
"Requirement already satisfied: setuptools in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (58.1.0)\n",
88-
"Requirement already satisfied: packaging>=20.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (23.2)\n",
89-
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from spacy==3.4.4->datafog==3.2.1b3) (3.3.0)\n",
90-
"Requirement already satisfied: aiosignal>=1.1.2 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from aiohttp->datafog==3.2.1b3) (1.3.1)\n",
91-
"Requirement already satisfied: attrs>=17.3.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from aiohttp->datafog==3.2.1b3) (23.2.0)\n",
92-
"Requirement already satisfied: frozenlist>=1.1.1 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from aiohttp->datafog==3.2.1b3) (1.4.1)\n",
93-
"Requirement already satisfied: multidict<7.0,>=4.5 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from aiohttp->datafog==3.2.1b3) (6.0.5)\n",
94-
"Requirement already satisfied: yarl<2.0,>=1.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from aiohttp->datafog==3.2.1b3) (1.9.4)\n",
95-
"Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from pandas->datafog==3.2.1b3) (2.9.0.post0)\n",
96-
"Requirement already satisfied: pytz>=2020.1 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from pandas->datafog==3.2.1b3) (2024.1)\n",
97-
"Requirement already satisfied: tzdata>=2022.7 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from pandas->datafog==3.2.1b3) (2024.1)\n",
98-
"Requirement already satisfied: pytest<9,>=7.0.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from pytest-asyncio->datafog==3.2.1b3) (8.0.2)\n",
99-
"Requirement already satisfied: pathlib-abc==0.1.1 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from pathy>=0.3.5->spacy==3.4.4->datafog==3.2.1b3) (0.1.1)\n",
100-
"Requirement already satisfied: iniconfig in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from pytest<9,>=7.0.0->pytest-asyncio->datafog==3.2.1b3) (2.0.0)\n",
101-
"Requirement already satisfied: pluggy<2.0,>=1.3.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from pytest<9,>=7.0.0->pytest-asyncio->datafog==3.2.1b3) (1.4.0)\n",
102-
"Requirement already satisfied: six>=1.5 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->datafog==3.2.1b3) (1.16.0)\n",
103-
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from thinc<8.2.0,>=8.1.0->spacy==3.4.4->datafog==3.2.1b3) (0.7.11)\n",
104-
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from thinc<8.2.0,>=8.1.0->spacy==3.4.4->datafog==3.2.1b3) (0.1.4)\n",
105-
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from typer<0.8.0,>=0.3.0->spacy==3.4.4->datafog==3.2.1b3) (8.1.7)\n",
106-
"Requirement already satisfied: MarkupSafe>=2.0 in /Users/sidmohan/.pyenv/versions/3.11.7/envs/2.2.0b1/lib/python3.11/site-packages (from jinja2->spacy==3.4.4->datafog==3.2.1b3) (2.1.5)\n",
107-
"Building wheels for collected packages: datafog\n",
108-
" Building editable for datafog (pyproject.toml) ... \u001b[?25ldone\n",
109-
"\u001b[?25h Created wheel for datafog: filename=datafog-3.2.1b3-0.editable-py3-none-any.whl size=6942 sha256=19c91592c093edff7f3ab1c073aa39e545cde656fe889f2b8534be5012121915\n",
110-
" Stored in directory: /private/var/folders/8r/bfx45hqn6lg5cbjnxjt2jdtm0000gn/T/pip-ephem-wheel-cache-ke4aeo6_/wheels/d0/c3/69/badf24afcbfead2b7a600b5b58c7ad0f020fd82029bbcf198e\n",
111-
"Successfully built datafog\n",
112-
"Installing collected packages: datafog\n",
113-
" Attempting uninstall: datafog\n",
114-
" Found existing installation: datafog 3.2.1b3\n",
115-
" Uninstalling datafog-3.2.1b3:\n",
116-
" Successfully uninstalled datafog-3.2.1b3\n",
117-
"Successfully installed datafog-3.2.1b3\n",
118-
"\n",
119-
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
120-
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
121-
]
122-
}
123-
],
46+
"outputs": [],
12447
"source": [
12548
"\n",
126-
"# !pip install --pre --upgrade datafog \n",
127-
"!pip install -e ../\n"
49+
"!pip install --pre --upgrade datafog -q\n"
12850
]
12951
},
13052
{
13153
"cell_type": "code",
132-
"execution_count": 22,
54+
"execution_count": 61,
13355
"metadata": {},
13456
"outputs": [],
13557
"source": [
@@ -147,7 +69,7 @@
14769
},
14870
{
14971
"cell_type": "code",
150-
"execution_count": 17,
72+
"execution_count": 62,
15173
"metadata": {},
15274
"outputs": [],
15375
"source": [
@@ -156,7 +78,7 @@
15678
},
15779
{
15880
"cell_type": "code",
159-
"execution_count": 8,
81+
"execution_count": 63,
16082
"metadata": {},
16183
"outputs": [
16284
{

tests/test_image_service.py

Lines changed: 62 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,62 @@
1-
# # Pytest test cases for the image_service.py module
2-
# # The test cases are written to test the image_service.py module
3-
# # The test cases will test the ImageService class
4-
# # The ImageService class is responsible for downloading images and extracting text from the images
5-
# # The ImageService class uses the ImageDownloader, DonutProcessor, and PytesseractProcessor classes
6-
# # The ImageService class has two methods: download_images and ocr_extract
7-
# # The download_images method is responsible for downloading the images from the given URLs
8-
# # The ocr_extract method is responsible for extracting the text from the images
9-
# # The ocr_extract method has two optional parameters: use_donut and use_tesseract
10-
# # The use_donut parameter is used to select the donut processor for the OCR
11-
# # The use_tesseract parameter is used to select the pytesseract processor for the OCR
12-
13-
14-
# import pytest
15-
# from PIL import Image
16-
17-
# from datafog.services.image_service import ImageService
18-
19-
# urls = [
20-
# "https://www.pdffiller.com/preview/101/35/101035394.png",
21-
# "https://www.pdffiller.com/preview/435/972/435972694.png",
22-
# ]
23-
24-
25-
# @pytest.mark.asyncio
26-
# async def test_download_images():
27-
# image_service1 = ImageService()
28-
# images = await image_service1.download_images(urls)
29-
# assert len(images) == 2
30-
# assert all(isinstance(image, Image.Image) for image in images)
31-
32-
33-
# @pytest.mark.asyncio
34-
# async def test_ocr_extract_with_tesseract():
35-
# image_service2 = ImageService(use_tesseract=True, use_donut=False)
36-
# texts = await image_service2.ocr_extract(urls)
37-
# assert isinstance(texts, list)
38-
# assert all(isinstance(text, str) for text in texts)
39-
40-
41-
# @pytest.mark.asyncio
42-
# async def test_ocr_extract_with_both():
43-
# image_service3 = ImageService(use_tesseract=True, use_donut=True)
44-
# with pytest.raises(
45-
# ValueError, match="Both OCR processors cannot be selected simultaneously"
46-
# ):
47-
# await image_service3.ocr_extract(urls)
48-
49-
50-
# @pytest.mark.asyncio
51-
# async def test_ocr_extract_with_donut():
52-
# image_service4 = ImageService(use_donut=True, use_tesseract=False)
53-
# texts = await image_service4.ocr_extract(urls)
54-
# assert isinstance(texts, list)
55-
# assert all(isinstance(text, str) for text in texts)
56-
57-
58-
# @pytest.mark.asyncio
59-
# async def test_ocr_extract_no_processor_selected():
60-
# image_service5 = ImageService(use_tesseract=False, use_donut=False)
61-
# with pytest.raises(ValueError, match="No OCR processor selected"):
62-
# await image_service5.ocr_extract(urls)
1+
# Pytest test cases for the image_service.py module
2+
# The test cases are written to test the image_service.py module
3+
# The test cases will test the ImageService class
4+
# The ImageService class is responsible for downloading images and extracting text from the images
5+
# The ImageService class uses the ImageDownloader, DonutProcessor, and PytesseractProcessor classes
6+
# The ImageService class has two methods: download_images and ocr_extract
7+
# The download_images method is responsible for downloading the images from the given URLs
8+
# The ocr_extract method is responsible for extracting the text from the images
9+
# The ocr_extract method has two optional parameters: use_donut and use_tesseract
10+
# The use_donut parameter is used to select the donut processor for the OCR
11+
# The use_tesseract parameter is used to select the pytesseract processor for the OCR
12+
13+
14+
import pytest
15+
from PIL import Image
16+
17+
from datafog.services.image_service import ImageService
18+
19+
urls = [
20+
"https://www.pdffiller.com/preview/101/35/101035394.png",
21+
"https://www.pdffiller.com/preview/435/972/435972694.png",
22+
]
23+
24+
25+
@pytest.mark.asyncio
26+
async def test_download_images():
27+
image_service1 = ImageService()
28+
images = await image_service1.download_images(urls)
29+
assert len(images) == 2
30+
assert all(isinstance(image, Image.Image) for image in images)
31+
32+
33+
@pytest.mark.asyncio
34+
async def test_ocr_extract_with_tesseract():
35+
image_service2 = ImageService(use_tesseract=True, use_donut=False)
36+
texts = await image_service2.ocr_extract(urls)
37+
assert isinstance(texts, list)
38+
assert all(isinstance(text, str) for text in texts)
39+
40+
41+
@pytest.mark.asyncio
42+
async def test_ocr_extract_with_both():
43+
image_service3 = ImageService(use_tesseract=True, use_donut=True)
44+
with pytest.raises(
45+
ValueError, match="Both OCR processors cannot be selected simultaneously"
46+
):
47+
await image_service3.ocr_extract(urls)
48+
49+
50+
@pytest.mark.asyncio
51+
async def test_ocr_extract_with_donut():
52+
image_service4 = ImageService(use_donut=True, use_tesseract=False)
53+
texts = await image_service4.ocr_extract(urls)
54+
assert isinstance(texts, list)
55+
assert all(isinstance(text, str) for text in texts)
56+
57+
58+
@pytest.mark.asyncio
59+
async def test_ocr_extract_no_processor_selected():
60+
image_service5 = ImageService(use_tesseract=False, use_donut=False)
61+
with pytest.raises(ValueError, match="No OCR processor selected"):
62+
await image_service5.ocr_extract(urls)

0 commit comments

Comments
 (0)