Skip to content

Commit 45e73ea

Browse files
committed
Code clean up and image orientation function fix suggested in PR
1 parent f2f5246 commit 45e73ea

File tree

2 files changed

+23
-26
lines changed

2 files changed

+23
-26
lines changed

app_doctr.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def create_ocr():
5050
doctr_ocr = DocTR(detect_language=False)
5151
return ocr_model, doctr_ocr
5252

53-
@st.cache_data
53+
@st.cache_data(show_spinner=False)
5454
def get_uploaded_images(tally_sheet):
5555
"""
5656
List of images uploaded by user as docTR DocumentFiles
@@ -64,7 +64,7 @@ def get_uploaded_images(tally_sheet):
6464
res.append(DocumentFile.from_images(image))
6565
return res
6666

67-
@st.cache_data
67+
@st.cache_data(show_spinner=False)
6868
def get_results(uploaded_images):
6969
return [doctr_ocr_functions.get_word_level_content(ocr_model, doc) for doc in uploaded_images]
7070

@@ -297,15 +297,11 @@ def clean_up(table_dfs):
297297
_List_: List of table data frames
298298
"""
299299
for table in table_dfs:
300-
print(table)
301300
for row in range(table.shape[0]):
302301
for col in range(table.shape[1]):
303302
cell_value = table.iloc[row][col]
304-
print(cell_value)
305303
if cell_value is None or cell_value=="None":
306-
table.iloc[row][col] = ""
307-
print(table.iloc[row][col])
308-
print(table_dfs)
304+
table.iloc[row][col] = ""
309305
return table_dfs
310306

311307
# Initializing session state variables that only need to be set on startup
@@ -455,8 +451,9 @@ def authenticate():
455451
# ***************************************
456452

457453
# Populate streamlit with data recognized from tally sheets
458-
uploaded_images = get_uploaded_images(tally_sheet_images)
459-
results = get_results(uploaded_images)
454+
with st.spinner("Running image recognition..."):
455+
uploaded_images = get_uploaded_images(tally_sheet_images)
456+
results = get_results(uploaded_images)
460457

461458
# Spinner for data upload. If it's going to be on screen for long, make it bespoke
462459
table_dfs, page_nums_to_display = [], []

src/msfocr/doctr/ocr_functions.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ def get_tabular_content(model, image, confidence_dict=None):
114114

115115
return table_df, confidence_df
116116

117-
118117
def get_sheet_type(res):
119118
"""
120119
Finds the type of the tally sheet (dataSet, orgUnit, period) from the result of OCR model, where
@@ -208,22 +207,23 @@ def correct_image_orientation(image_path):
208207
:param image_path: The path to the image file.
209208
:return: PIL.Image.Image: The image with corrected orientation.
210209
"""
211-
with Image.open(image_path) as image:
212-
orientation = None
213-
try:
214-
for orientation in ExifTags.TAGS.keys():
215-
if ExifTags.TAGS[orientation] == 'Orientation':
216-
break
217-
exif = dict(image.getexif().items())
218-
if exif.get(orientation) == 3:
219-
image = image.rotate(180, expand=True)
220-
elif exif.get(orientation) == 6:
221-
image = image.rotate(270, expand=True)
222-
elif exif.get(orientation) == 8:
223-
image = image.rotate(90, expand=True)
224-
except (AttributeError, KeyError, IndexError):
225-
pass
226-
return image.copy()
210+
with Image.open(image_path) as image:
211+
image.load()
212+
orientation = None
213+
try:
214+
for orientation in ExifTags.TAGS.keys():
215+
if ExifTags.TAGS[orientation] == 'Orientation':
216+
break
217+
exif = dict(image.getexif().items())
218+
if exif.get(orientation) == 3:
219+
image = image.rotate(180, expand=True)
220+
elif exif.get(orientation) == 6:
221+
image = image.rotate(270, expand=True)
222+
elif exif.get(orientation) == 8:
223+
image = image.rotate(90, expand=True)
224+
except (AttributeError, KeyError, IndexError):
225+
pass
226+
return image
227227

228228
# ocr_model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
229229
# document = DocumentFile.from_images("IMG_20240514_090947.jpg")

0 commit comments

Comments
 (0)