Skip to content

Commit 0691864

Browse files
Sid MohanSid Mohan
Sid Mohan
authored and
Sid Mohan
committed
precommit
1 parent 9c9d908 commit 0691864

24 files changed

+727
-24
lines changed

.gitignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,7 @@ node_modules/
3232
.DS_Store
3333
.venv
3434
examples/venv/
35-
error_log.txt
35+
error_log.txt
36+
docs/*
37+
!docs/*.rst
38+
!docs/conf.py

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
</p>
44

55
<p align="center">
6-
<b>Open-source DevSecOps for Generative AI Systems</b>. <br />
6+
<b>Open-source PII Detection & Anonymization</b>. <br />
77
</p>
88

99
<p align="center">

datafog/client.py

+64-9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
# client.py
1+
"""
2+
Client module for DataFog.
3+
4+
Provides CLI commands for scanning images and text using DataFog's OCR and PII detection capabilities.
5+
"""
26

37
import asyncio
48
import logging
@@ -25,7 +29,18 @@ def scan_image(
2529
),
2630
operations: str = typer.Option("annotate_pii", help="Operation to perform"),
2731
):
28-
"""Extract text from images."""
32+
"""
33+
Scan images for text and PII.
34+
35+
Extracts text from images using OCR, then detects PII entities.
36+
Handles both remote URLs and local file paths.
37+
38+
Args:
39+
image_urls: List of image URLs or file paths
40+
operations: Pipeline operations to run (default: annotate_pii)
41+
42+
Prints results or exits with error on failure.
43+
"""
2944
if not image_urls:
3045
typer.echo("No image URLs or file paths provided. Please provide at least one.")
3146
raise typer.Exit(code=1)
@@ -48,7 +63,17 @@ def scan_text(
4863
),
4964
operations: str = typer.Option("annotate_pii", help="Operation to perform"),
5065
):
51-
"""Annotate texts to detect PII entities."""
66+
"""
67+
Scan texts for PII.
68+
69+
Detects PII entities in a list of input texts.
70+
71+
Args:
72+
str_list: List of texts to analyze
73+
operations: Pipeline operations to run (default: annotate_pii)
74+
75+
Prints results or exits with error on failure.
76+
"""
5277
if not str_list:
5378
typer.echo("No texts provided.")
5479
raise typer.Exit(code=1)
@@ -66,19 +91,34 @@ def scan_text(
6691

6792
@app.command()
6893
def health():
69-
"""Check DataFog service health."""
94+
"""
95+
Check DataFog service health.
96+
97+
Prints a message indicating that DataFog is running.
98+
"""
7099
typer.echo("DataFog is running.")
71100

72101

73102
@app.command()
74103
def show_config():
75-
"""Show current configuration."""
104+
"""
105+
Show current configuration.
106+
107+
Prints the current DataFog configuration.
108+
"""
76109
typer.echo(get_config())
77110

78111

79112
@app.command()
80113
def download_model(model_name: str = typer.Argument(..., help="Model to download")):
81-
"""Download a model."""
114+
"""
115+
Download a spaCy model.
116+
117+
Args:
118+
model_name: Name of the model to download.
119+
120+
Prints a confirmation message after downloading.
121+
"""
82122
SpacyAnnotator.download_model(model_name)
83123
typer.echo(f"Model {model_name} downloaded.")
84124

@@ -87,21 +127,36 @@ def download_model(model_name: str = typer.Argument(..., help="Model to download
87127
def show_spacy_model_directory(
88128
model_name: str = typer.Argument(..., help="Model to check")
89129
):
90-
"""Show model path."""
130+
"""
131+
Show the directory path for a spaCy model.
132+
133+
Args:
134+
model_name: Name of the model to check.
135+
136+
Prints the directory path of the specified model.
137+
"""
91138
annotator = SpacyAnnotator(model_name)
92139
typer.echo(annotator.show_model_path())
93140

94141

95142
@app.command()
96143
def list_spacy_models():
97-
"""List available models."""
144+
"""
145+
List available spaCy models.
146+
147+
Prints a list of all available spaCy models.
148+
"""
98149
annotator = SpacyAnnotator()
99150
typer.echo(annotator.list_models())
100151

101152

102153
@app.command()
103154
def list_entities():
104-
"""List available entities."""
155+
"""
156+
List available entities.
157+
158+
Prints a list of all available entities that can be recognized.
159+
"""
105160
annotator = SpacyAnnotator()
106161
typer.echo(annotator.list_entities())
107162

datafog/config.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
"""
2+
Configuration module for DataFog.
3+
4+
Defines settings and provides a global config instance.
5+
Includes API keys, URLs, timeouts, and other options.
6+
"""
7+
18
import os
29
from enum import Enum
310
from typing import Optional
@@ -7,7 +14,17 @@
714

815

916
class DataFogConfig(BaseSettings):
10-
"""Configuration settings for DataFog SDK"""
17+
"""
18+
Configuration settings for DataFog SDK.
19+
20+
This class defines all the configuration options used throughout the DataFog SDK.
21+
It includes settings for API authentication, service URLs, timeouts, retries,
22+
rate limiting, and logging. The configuration can be updated at runtime using
23+
environment variables or programmatically via the update method.
24+
25+
All settings have default values that can be overridden as needed. The class
26+
uses Pydantic for data validation and settings management.
27+
"""
1128

1229
# API Keys and Authentication
1330
api_key: str = os.environ.get("DATAFOG_API_KEY", "")
@@ -60,6 +77,15 @@ def configure(**kwargs):
6077

6178

6279
class OperationType(str, Enum):
80+
"""
81+
Enum for supported DataFog operations.
82+
83+
ANNOTATE_PII: Detect and annotate PII in text
84+
EXTRACT_TEXT: Extract text from images
85+
REDACT_PII: Remove PII from text
86+
ANONYMIZE_PII: Replace PII with fake data
87+
"""
88+
6389
ANNOTATE_PII = "annotate_pii"
6490
EXTRACT_TEXT = "extract_text"
6591
REDACT_PII = "redact_pii"

datafog/exceptions.py

+52-5
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,78 @@
1-
# exceptions.py
1+
"""
2+
Exceptions module for DataFog SDK.
3+
4+
This module defines custom exceptions and utility functions for error handling in the DataFog SDK.
5+
"""
26

37

48
class DataFogException(Exception):
5-
"""Base exception for DataFog SDK"""
9+
"""
10+
Base exception for DataFog SDK.
11+
12+
Attributes:
13+
message (str): The error message.
14+
status_code (int, optional): The HTTP status code associated with the error.
15+
"""
616

717
def __init__(self, message: str, status_code: int = None):
18+
"""
19+
Initialize a DataFogException.
20+
21+
Args:
22+
message (str): The error message.
23+
status_code (int, optional): The HTTP status code associated with the error.
24+
"""
825
self.message = message
926
self.status_code = status_code
1027
super().__init__(self.message)
1128

1229

1330
class BadRequestError(DataFogException):
14-
"""Exception raised for 400 Bad Request errors"""
31+
"""
32+
Exception raised for 400 Bad Request errors.
33+
34+
Inherits from DataFogException and sets the status code to 400.
35+
"""
1536

1637
def __init__(self, message: str):
38+
"""
39+
Initialize a BadRequestError.
40+
41+
Args:
42+
message (str): The error message.
43+
"""
1744
super().__init__(message, status_code=400)
1845

1946

2047
class UnprocessableEntityError(DataFogException):
21-
"""Exception raised for 422 Unprocessable Entity errors"""
48+
"""
49+
Exception raised for 422 Unprocessable Entity errors.
50+
51+
Inherits from DataFogException and sets the status code to 422.
52+
"""
2253

2354
def __init__(self, message: str):
55+
"""
56+
Initialize an UnprocessableEntityError.
57+
58+
Args:
59+
message (str): The error message.
60+
"""
2461
super().__init__(message, status_code=422)
2562

2663

2764
def raise_for_status_code(status_code: int, error_message: str):
28-
"""Raise the appropriate exception based on the status code"""
65+
"""
66+
Raise the appropriate exception based on the status code.
67+
68+
Args:
69+
status_code (int): The HTTP status code.
70+
error_message (str): The error message to include in the exception.
71+
72+
Raises:
73+
BadRequestError: If the status code is 400.
74+
UnprocessableEntityError: If the status code is 422.
75+
"""
2976
if status_code == 400:
3077
raise BadRequestError(error_message)
3178
elif status_code == 422:

0 commit comments

Comments
 (0)