Skip to content

Commit 6e88412

Browse files
Sid MohanSid Mohan
authored andcommitted
response to pirana comments
1 parent 6534097 commit 6e88412

File tree

6 files changed

+14
-22
lines changed

6 files changed

+14
-22
lines changed

README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,6 @@ Note: The DataFog library uses asynchronous programming for OCR, so make sure to
8585

8686
## Examples
8787

88-
TODO: Update README. switch installation method to install requirements-dev.txt and then -e
89-
9088
For more detailed examples, check out our Jupyter notebooks in the `examples/` directory:
9189

9290
- `text_annotation_example.ipynb`: Demonstrates text PII annotation
@@ -118,7 +116,7 @@ For local development:
118116
```
119117
5. Install the package in editable mode:
120118
```
121-
pip install -e .
119+
pip install -r requirements-dev.txt
122120
```
123121
6. Set up the project:
124122
```

datafog/processing/text_processing/spacy_pii_annotator.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,16 @@ def create(cls) -> "SpacyPIIAnnotator":
3939
import subprocess
4040

4141
subprocess.run(
42-
["python", "-m", "spacy", "download", "en_core_web_lg"], check=True
42+
[
43+
"python",
44+
"-m",
45+
"pip",
46+
"install",
47+
"--no-deps",
48+
"--no-cache-dir",
49+
"https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl",
50+
],
51+
check=True,
4352
)
4453
nlp = spacy.load("en_core_web_lg")
4554

datafog/services/image_service.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
from PIL import Image
99

1010
from datafog.processing.image_processing.donut_processor import DonutProcessor
11-
12-
# from datafog.processing.image_processing.image_downloader import ImageDownloader
1311
from datafog.processing.image_processing.pytesseract_processor import (
1412
PytesseractProcessor,
1513
)
@@ -41,13 +39,6 @@ def __init__(self, use_donut: bool = False, use_tesseract: bool = True):
4139
PytesseractProcessor() if self.use_tesseract else None
4240
)
4341

44-
# async def download_images(self, urls: List[str]) -> List[Image.Image]:
45-
# async def download_image(url: str) -> Image.Image:
46-
# return await self.downloader.download_image(url)
47-
48-
# tasks = [asyncio.create_task(download_image(url)) for url in urls]
49-
# return await asyncio.gather(*tasks)
50-
5142
async def download_images(self, urls: List[str]) -> List[Image.Image]:
5243
async def download_image(url: str) -> Image.Image:
5344
return await self.downloader.download_image(url)

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
pandas
2-
Requests
2+
requests==2.32.3
33
spacy==3.7.5
44
pydantic>=2.8.2,<3.0.0
55
Pillow

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
long_description = f.read()
66

77
# Use a single source of truth for the version
8-
__version__ = "3.4.0b1"
8+
__version__ = "3.4.0"
99

1010
project_urls = {
1111
"Homepage": "https://datafog.ai",
@@ -26,7 +26,7 @@
2626
packages=find_packages(),
2727
install_requires=[
2828
"pandas",
29-
"Requests",
29+
"requests==2.32.3",
3030
"spacy==3.7.5",
3131
"pydantic",
3232
"Pillow",

tests/test_image_service.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,6 @@
2323
]
2424

2525

26-
# @pytest.mark.asyncio
27-
# async def test_download_images():
28-
# image_service1 = ImageService()
29-
# images = await image_service1.download_images(urls)
30-
# assert len(images) == 2
31-
# assert all(isinstance(image, Image.Image) for image in images)
3226
@pytest.mark.asyncio
3327
async def test_download_images():
3428
image_service = ImageService()

0 commit comments

Comments
 (0)