Skip to content

Commit e98d7e1

Browse files
committed
Add license plate recognition using tesseract OCR
1 parent 98e5188 commit e98d7e1

14 files changed

+187
-10
lines changed

conda-cpu.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ dependencies:
1212
- tensorflow==2.3.0rc0
1313
- absl-py
1414
- easydict
15-
- pillow
15+
- pillow
16+
- pytesseract

conda-gpu.yml

+1
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ dependencies:
1515
- absl-py
1616
- easydict
1717
- pillow
18+
- pytesseract

core/functions.py

+30-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import random
44
import numpy as np
55
import tensorflow as tf
6-
from core.config import cfg
6+
import pytesseract
77
from core.utils import read_class_names
8+
from core.config import cfg
89

910
# function to count objects, can return total classes or count per class
1011
def count_objects(data, by_class = False, allowed_classes = list(read_class_names(cfg.YOLO.CLASSES).values())):
@@ -55,4 +56,31 @@ def crop_objects(img, data, path, allowed_classes):
5556
# save image
5657
cv2.imwrite(img_path, cropped_img)
5758
else:
58-
continue
59+
continue
60+
61+
# function to run general Tesseract OCR on any detections
62+
def ocr(img, data):
63+
boxes, scores, classes, num_objects = data
64+
class_names = read_class_names(cfg.YOLO.CLASSES)
65+
for i in range(num_objects):
66+
# get class name for detection
67+
class_index = int(classes[i])
68+
class_name = class_names[class_index]
69+
# separate coordinates from box
70+
xmin, ymin, xmax, ymax = boxes[i]
71+
# get the subimage that makes up the bounded region and take an additional 5 pixels on each side
72+
box = img[int(ymin)-5:int(ymax)+5, int(xmin)-5:int(xmax)+5]
73+
# grayscale region within bounding box
74+
gray = cv2.cvtColor(box, cv2.COLOR_RGB2GRAY)
75+
# threshold the image using Otsus method to preprocess for tesseract
76+
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
77+
# perform a median blur to smooth image slightly
78+
blur = cv2.medianBlur(thresh, 3)
79+
# resize image to double the original size as tesseract does better with certain text size
80+
blur = cv2.resize(blur, None, fx = 2, fy = 2, interpolation = cv2.INTER_CUBIC)
81+
# run tesseract and convert image text to string
82+
try:
83+
text = pytesseract.image_to_string(blur, config='--psm 11 --oem 3')
84+
print("Class: {}, Text Extracted: {}".format(class_name, text))
85+
except:
86+
text = None

core/utils.py

+82-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,80 @@
33
import colorsys
44
import numpy as np
55
import tensorflow as tf
6+
import pytesseract
67
from core.config import cfg
8+
import re
9+
10+
# function to recognize license plate numbers using Tesseract OCR
11+
def recognize_plate(img, coords):
12+
# separate coordinates from box
13+
xmin, ymin, xmax, ymax = coords
14+
# get the subimage that makes up the bounded region and take an additional 5 pixels on each side
15+
box = img[int(ymin)-5:int(ymax)+5, int(xmin)-5:int(xmax)+5]
16+
# grayscale region within bounding box
17+
gray = cv2.cvtColor(box, cv2.COLOR_RGB2GRAY)
18+
# resize image to three times as large as original for better readability
19+
gray = cv2.resize(gray, None, fx = 3, fy = 3, interpolation = cv2.INTER_CUBIC)
20+
# perform gaussian blur to smoothen image
21+
blur = cv2.GaussianBlur(gray, (5,5), 0)
22+
#cv2.imshow("Gray", gray)
23+
#cv2.waitKey(0)
24+
# threshold the image using Otsus method to preprocess for tesseract
25+
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
26+
#cv2.imshow("Otsu Threshold", thresh)
27+
#cv2.waitKey(0)
28+
# create rectangular kernel for dilation
29+
rect_kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
30+
# apply dilation to make regions more clear
31+
dilation = cv2.dilate(thresh, rect_kern, iterations = 1)
32+
#cv2.imshow("Dilation", dilation)
33+
#cv2.waitKey(0)
34+
# find contours of regions of interest within license plate
35+
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
36+
# sort contours left-to-right
37+
sorted_contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
38+
# create copy of gray image
39+
im2 = gray.copy()
40+
# create blank string to hold license plate number
41+
plate_num = ""
42+
# loop through contours and find individual letters and numbers in license plate
43+
for cnt in sorted_contours:
44+
x,y,w,h = cv2.boundingRect(cnt)
45+
height, width = im2.shape
46+
# if height of box is not tall enough relative to total height then skip
47+
if height / float(h) > 6: continue
48+
49+
ratio = h / float(w)
50+
# if height to width ratio is less than 1.5 skip
51+
if ratio < 1.5: continue
52+
53+
# if width is not wide enough relative to total width then skip
54+
if width / float(w) > 15: continue
55+
56+
area = h * w
57+
# if area is less than 100 pixels skip
58+
if area < 100: continue
59+
60+
# draw the rectangle
61+
rect = cv2.rectangle(im2, (x,y), (x+w, y+h), (0,255,0),2)
62+
# grab character region of image
63+
roi = thresh[y-5:y+h+5, x-5:x+w+5]
64+
# perfrom bitwise not to flip image to black text on white background
65+
roi = cv2.bitwise_not(roi)
66+
# perform another blur on character region
67+
roi = cv2.medianBlur(roi, 5)
68+
try:
69+
text = pytesseract.image_to_string(roi, config='-c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ --psm 8 --oem 3')
70+
# clean tesseract text by removing any unwanted blank spaces
71+
clean_text = re.sub('[\W_]+', '', text)
72+
plate_num += clean_text
73+
except:
74+
text = None
75+
if plate_num != None:
76+
print("License Plate #: ", plate_num)
77+
#cv2.imshow("Character's Segmented", im2)
78+
#cv2.waitKey(0)
79+
return plate_num
780

881
def load_freeze_layer(model='yolov4', tiny=False):
982
if tiny:
@@ -103,7 +176,6 @@ def get_anchors(anchors_path, tiny=False):
103176
return anchors.reshape(3, 3, 2)
104177

105178
def image_preprocess(image, target_size, gt_boxes=None):
106-
107179
ih, iw = target_size
108180
h, w, _ = image.shape
109181

@@ -134,7 +206,7 @@ def format_boxes(bboxes, image_height, image_width):
134206
box[0], box[1], box[2], box[3] = xmin, ymin, xmax, ymax
135207
return bboxes
136208

137-
def draw_bbox(image, bboxes, info = False, counted_classes = None, show_label=True, allowed_classes=list(read_class_names(cfg.YOLO.CLASSES).values())):
209+
def draw_bbox(image, bboxes, info = False, counted_classes = None, show_label=True, allowed_classes=list(read_class_names(cfg.YOLO.CLASSES).values()), read_plate = False):
138210
classes = read_class_names(cfg.YOLO.CLASSES)
139211
num_classes = len(classes)
140212
image_h, image_w, _ = image.shape
@@ -157,6 +229,13 @@ def draw_bbox(image, bboxes, info = False, counted_classes = None, show_label=Tr
157229
if class_name not in allowed_classes:
158230
continue
159231
else:
232+
if read_plate:
233+
height_ratio = int(image_h / 25)
234+
plate_number = recognize_plate(image, coor)
235+
if plate_number != None:
236+
cv2.putText(image, plate_number, (int(coor[0]), int(coor[1]-height_ratio)),
237+
cv2.FONT_HERSHEY_SIMPLEX, 1.25, (255,255,0), 2)
238+
160239
bbox_color = colors[class_ind]
161240
bbox_thick = int(0.6 * (image_h + image_w) / 600)
162241
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
@@ -172,7 +251,7 @@ def draw_bbox(image, bboxes, info = False, counted_classes = None, show_label=Tr
172251
cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) #filled
173252

174253
cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX,
175-
fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
254+
fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
176255

177256
if counted_classes != None:
178257
height_ratio = int(image_h / 25)

data/images/car.jpg

110 KB
Loading

data/images/car2.jpg

149 KB
Loading

data/images/car3.jpg

262 KB
Loading

data/images/car4.jpg

53.4 KB
Loading

data/video/license_plate.mp4

2.27 MB
Binary file not shown.

detect.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
flags.DEFINE_boolean('dont_show', False, 'dont show image output')
3232
flags.DEFINE_boolean('info', False, 'print info on detections')
3333
flags.DEFINE_boolean('crop', False, 'crop detections from images')
34+
flags.DEFINE_boolean('ocr', False, 'perform generic OCR on detection regions')
35+
flags.DEFINE_boolean('plate', False, 'perform license plate recognition')
3436

3537
def main(_argv):
3638
config = ConfigProto()
@@ -118,15 +120,20 @@ def main(_argv):
118120
pass
119121
crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes)
120122

123+
# if ocr flag is enabled, perform general text extraction using Tesseract OCR on object detection bounding box
124+
if FLAGS.ocr:
125+
ocr(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox)
126+
127+
# if count flag is enabled, perform counting of objects
121128
if FLAGS.count:
122129
# count objects found
123130
counted_classes = count_objects(pred_bbox, by_class = False, allowed_classes=allowed_classes)
124131
# loop through dict and print
125132
for key, value in counted_classes.items():
126133
print("Number of {}s: {}".format(key, value))
127-
image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes)
134+
image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate = FLAGS.plate)
128135
else:
129-
image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, allowed_classes=allowed_classes)
136+
image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate = FLAGS.plate)
130137

131138
image = Image.fromarray(image.astype(np.uint8))
132139
if not FLAGS.dont_show:

detect_video.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
flags.DEFINE_boolean('dont_show', False, 'dont show video output')
3434
flags.DEFINE_boolean('info', False, 'print info on detections')
3535
flags.DEFINE_boolean('crop', False, 'crop detections from images')
36+
flags.DEFINE_boolean('plate', False, 'perform license plate recognition')
3637

3738
def main(_argv):
3839
config = ConfigProto()
@@ -154,9 +155,9 @@ def main(_argv):
154155
# loop through dict and print
155156
for key, value in counted_classes.items():
156157
print("Number of {}s: {}".format(key, value))
157-
image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes)
158+
image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate)
158159
else:
159-
image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes)
160+
image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate)
160161

161162
fps = 1.0 / (time.time() - start_time)
162163
print("FPS: %.2f" % fps)

license_plate_recognizer.py

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# test file if you want to quickly try tesseract on a license plate image
2+
import pytesseract
3+
import cv2
4+
import os
5+
import numpy as np
6+
7+
# point to license plate image (works well with custom crop function)
8+
gray = cv2.imread("./detections/crop/car3/license_plate_.png", 0)
9+
gray = cv2.resize( gray, None, fx = 3, fy = 3, interpolation = cv2.INTER_CUBIC)
10+
blur = cv2.GaussianBlur(gray, (5,5), 0)
11+
gray = cv2.medianBlur(gray, 3)
12+
# perform otsu thresh (using binary inverse since opencv contours work better with white text)
13+
ret, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
14+
cv2.imshow("Otsu", thresh)
15+
cv2.waitKey(0)
16+
rect_kern = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
17+
18+
# apply dilation
19+
dilation = cv2.dilate(thresh, rect_kern, iterations = 1)
20+
#cv2.imshow("dilation", dilation)
21+
#cv2.waitKey(0)
22+
# find contours
23+
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
24+
sorted_contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
25+
26+
# create copy of image
27+
im2 = gray.copy()
28+
29+
plate_num = ""
30+
# loop through contours and find letters in license plate
31+
for cnt in sorted_contours:
32+
x,y,w,h = cv2.boundingRect(cnt)
33+
height, width = im2.shape
34+
35+
# if height of box is not a quarter of total height then skip
36+
if height / float(h) > 6: continue
37+
ratio = h / float(w)
38+
# if height to width ratio is less than 1.5 skip
39+
if ratio < 1.5: continue
40+
area = h * w
41+
# if width is not more than 25 pixels skip
42+
if width / float(w) > 15: continue
43+
# if area is less than 100 pixels skip
44+
if area < 100: continue
45+
# draw the rectangle
46+
rect = cv2.rectangle(im2, (x,y), (x+w, y+h), (0,255,0),2)
47+
roi = thresh[y-5:y+h+5, x-5:x+w+5]
48+
roi = cv2.bitwise_not(roi)
49+
roi = cv2.medianBlur(roi, 5)
50+
#cv2.imshow("ROI", roi)
51+
#cv2.waitKey(0)
52+
text = pytesseract.image_to_string(roi, config='-c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ --psm 8 --oem 3')
53+
#print(text)
54+
plate_num += text
55+
print(plate_num)
56+
cv2.imshow("Character's Segmented", im2)
57+
cv2.waitKey(0)
58+
cv2.destroyAllWindows()

requirements-gpu.txt

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ absl-py
66
matplotlib
77
easydict
88
pillow
9+
pytesseract

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ absl-py
66
easydict
77
matplotlib
88
pillow
9+
pytesseract

0 commit comments

Comments
 (0)