Skip to content

Commit a81a4fc

Browse files
committed
refactoring
1 parent eb7e60f commit a81a4fc

File tree

3 files changed

+52
-48
lines changed

3 files changed

+52
-48
lines changed

extract_words.py

+38-34
Original file line numberDiff line numberDiff line change
@@ -52,21 +52,50 @@ def ocrOutput(self, img_name, img, bbox):
5252
it saves the txt outputs as a json format
5353
"""
5454
crop_img_names = cropRoi(img, bbox, self.denoise)
55+
print("crop img names:", crop_img_names)
5556
id_infos= ["Tc", "Surname", "Name", "DateofBirth"]
5657
jsonData = JsonData()
57-
58+
text_output = {"Tc":"", "Surname":"", "Name":"", "DateofBirth":""}
5859
for info, img in zip(id_infos, crop_img_names):
5960
result = self.reader.readtext(img)
6061
if(len(result)):
6162
box, text, prob = result[0]
62-
jsonData.text_output[info] = text.upper()
63-
jsonData.text_output["DateofBirth"] = getonlyDigits(jsonData.text_output["DateofBirth"])
63+
text_output[info] = text.upper()
64+
65+
text_output["DateofBirth"] = getonlyDigits(text_output["DateofBirth"])
6466

65-
CardInfo[img_name] = jsonData.text_output
67+
CardInfo[img_name] = text_output
6668
jsonData.saveDict(CardInfo)
6769

68-
return jsonData.text_output
70+
return text_output
6971

72+
class TesseractOcr:
73+
74+
def __init__(self, border_thresh, denoise,) -> None:
75+
self.denoise = denoise
76+
self.BORDER_THRSH = border_thresh
77+
78+
79+
80+
def ocrOutput(self, img_name, img, bbox):
81+
"""
82+
it saves the txt outputs as a json format
83+
"""
84+
crop_img_names = cropRoi(img, bbox, self.denoise)
85+
id_infos= ["Tc", "Surname", "Name", "DateofBirth"]
86+
jsonData = JsonData()
87+
text_output = {"Tc":"", "Surname":"", "Name":"", "DateofBirth":""}
88+
for info, img in zip(id_infos, crop_img_names):
89+
text = pytesseract.image_to_string(img)
90+
91+
text_output[info] = text.upper()
92+
93+
text_output["DateofBirth"] = getonlyDigits(text_output["DateofBirth"])
94+
95+
CardInfo[img_name] = text_output
96+
jsonData.saveDict(CardInfo)
97+
98+
return text_output
7099

71100
def cropRoi(img, bbox, denoise):
72101

@@ -84,9 +113,9 @@ def cropRoi(img, bbox, denoise):
84113

85114
if not os.path.exists("outputs/target_crops/"):
86115
os.makedirs("outputs/target_crops/")
87-
crop_name = "outputs/target_crops/" + str(info) +".jpg"
88-
plt.imsave(crop_name, crop_img)
89-
crop_img_names.append(crop_name)
116+
crop_name = "outputs/target_crops/" + str(info) +".jpg"
117+
plt.imsave(crop_name, crop_img)
118+
crop_img_names.append(crop_name)
90119

91120
return crop_img_names
92121

@@ -116,32 +145,7 @@ def denoiseImage(img):
116145

117146

118147

119-
class TesseractOcr:
120-
121-
def __init__(self, border_thresh, denoise,) -> None:
122-
self.denoise = denoise
123-
self.BORDER_THRSH = border_thresh
124-
125-
126-
def ocrOutput(self, img_name, img, bbox):
127-
"""
128-
it saves the txt outputs as a json format
129-
"""
130-
crop_img_names = cropRoi(img, bbox, self.denoise)
131-
id_infos= ["Tc", "Surname", "Name", "DateofBirth"]
132-
jsonData = JsonData()
133-
134-
for info, img in zip(id_infos, crop_img_names):
135-
text = pytesseract.image_to_string(img)
136-
137-
jsonData.text_output[info] = text.upper()
138-
139-
jsonData.text_output["DateofBirth"] = getonlyDigits(jsonData.text_output["DateofBirth"])
140-
141-
CardInfo[img_name] = jsonData.text_output
142-
jsonData.saveDict(CardInfo)
143-
144-
return jsonData.text_output
148+
145149

146150
def factory(ocr_method = "EasyOcr", border_thresh = 3, denoise = False):
147151
ocr_factory = {"EasyOcr": EasyOcr,

main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def getBoxRegions(regions):
205205

206206
plt.title("final_img")
207207
plt.imshow(final_img)
208-
plt.imsave("final_imgp.jpg",final_img)
208+
#plt.imsave("final_imgp.jpg",final_img)
209209
#cv2.imwrite("predicted_mask.jpg", final_img)
210210
plt.show()
211211

utlis.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -150,21 +150,21 @@ def correctPerspective(img):
150150

151151
warped_img = warpImg(img, approx , width_q, heigth_q)
152152

153-
plt.title("original image")
154-
plt.imshow(img)
155-
plt.show()
156-
plt.imsave("ori_img.jpg", img)
153+
#plt.title("original image")
154+
#plt.imshow(img)
155+
#plt.show()
156+
#plt.imsave("ori_img.jpg", img)
157157

158158

159-
plt.title("processed image")
160-
plt.imshow(img_erosion)
161-
plt.show()
162-
plt.imsave("img_erosion.jpg", img_erosion)
163-
164-
plt.title("warped image")
165-
plt.imshow(warped_img)
166-
plt.show()
167-
plt.imsave("warped_img.jpg", warped_img)
159+
#plt.title("processed image")
160+
#plt.imshow(img_erosion)
161+
#plt.show()
162+
#plt.imsave("img_erosion.jpg", img_erosion)
163+
164+
#plt.title("warped image")
165+
#plt.imshow(warped_img)
166+
#plt.show()
167+
#plt.imsave("warped_img.jpg", warped_img)
168168

169169

170170
return warped_img

0 commit comments

Comments
 (0)