theAIGuysCode
diff --git a/‎conda-cpu.yml
+2-1 b/‎conda-cpu.yml
+2-1
diff --git a/‎conda-gpu.yml
+1 b/‎conda-gpu.yml
+1
diff --git a/‎core/functions.py
+30-2 b/‎core/functions.py
+30-2
diff --git a/‎core/utils.py
+82-3 b/‎core/utils.py
+82-3
diff --git a/‎data/images/car.jpg
110 KB b/‎data/images/car.jpg
110 KB
diff --git a/‎data/images/car2.jpg
149 KB b/‎data/images/car2.jpg
149 KB
diff --git a/‎data/images/car3.jpg
262 KB b/‎data/images/car3.jpg
262 KB
diff --git a/‎data/images/car4.jpg
53.4 KB b/‎data/images/car4.jpg
53.4 KB
diff --git a/‎data/video/license_plate.mp4
2.27 MB b/‎data/video/license_plate.mp4
2.27 MB
diff --git a/‎detect.py
+9-2 b/‎detect.py
+9-2
diff --git a/‎detect_video.py
+3-2 b/‎detect_video.py
+3-2
diff --git a/‎license_plate_recognizer.py
+58 b/‎license_plate_recognizer.py
+58
diff --git a/‎requirements-gpu.txt
+1 b/‎requirements-gpu.txt
+1
diff --git a/‎requirements.txt
+1 b/‎requirements.txt
+1
@@ -12,4 +12,5 @@ dependencies:
     - tensorflow==2.3.0rc0
     - absl-py
     - easydict
-    - pillow
+    - pillow
+    - pytesseract
@@ -15,3 +15,4 @@ dependencies:
     - absl-py
     - easydict
     - pillow
+    - pytesseract
@@ -3,8 +3,9 @@
 import random
 import numpy as np
 import tensorflow as tf
-from core.config import cfg
+import pytesseract
 from core.utils import read_class_names
+from core.config import cfg
 
 # function to count objects, can return total classes or count per class
 def count_objects(data, by_class = False, allowed_classes = list(read_class_names(cfg.YOLO.CLASSES).values())):
@@ -55,4 +56,31 @@ def crop_objects(img, data, path, allowed_classes):
             # save image
             cv2.imwrite(img_path, cropped_img)
         else:
-            continue
+            continue
+        
+# function to run general Tesseract OCR on any detections 
+def ocr(img, data):
+    boxes, scores, classes, num_objects = data
+    class_names = read_class_names(cfg.YOLO.CLASSES)
+    for i in range(num_objects):
+        # get class name for detection
+        class_index = int(classes[i])
+        class_name = class_names[class_index]
+        # separate coordinates from box
+        xmin, ymin, xmax, ymax = boxes[i]
+        # get the subimage that makes up the bounded region and take an additional 5 pixels on each side
+        box = img[int(ymin)-5:int(ymax)+5, int(xmin)-5:int(xmax)+5]
+        # grayscale region within bounding box
+        gray = cv2.cvtColor(box, cv2.COLOR_RGB2GRAY)
+        # threshold the image using Otsus method to preprocess for tesseract
+        thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
+        # perform a median blur to smooth image slightly
+        blur = cv2.medianBlur(thresh, 3)
+        # resize image to double the original size as tesseract does better with certain text size
+        blur = cv2.resize(blur, None, fx = 2, fy = 2, interpolation = cv2.INTER_CUBIC)
+        # run tesseract and convert image text to string
+        try:
+            text = pytesseract.image_to_string(blur, config='--psm 11 --oem 3')
+            print("Class: {}, Text Extracted: {}".format(class_name, text))
+        except: 
+            text = None
@@ -3,7 +3,80 @@
 import colorsys
 import numpy as np
 import tensorflow as tf
+import pytesseract
 from core.config import cfg
+import re
+
+# function to recognize license plate numbers using Tesseract OCR
+def recognize_plate(img, coords):
+    # separate coordinates from box
+    xmin, ymin, xmax, ymax = coords
+    # get the subimage that makes up the bounded region and take an additional 5 pixels on each side
+    box = img[int(ymin)-5:int(ymax)+5, int(xmin)-5:int(xmax)+5]
+    # grayscale region within bounding box
+    gray = cv2.cvtColor(box, cv2.COLOR_RGB2GRAY)
+    # resize image to three times as large as original for better readability
+    gray = cv2.resize(gray, None, fx = 3, fy = 3, interpolation = cv2.INTER_CUBIC)
+    # perform gaussian blur to smoothen image
+    blur = cv2.GaussianBlur(gray, (5,5), 0)
+    #cv2.imshow("Gray", gray)
+    #cv2.waitKey(0)
+    # threshold the image using Otsus method to preprocess for tesseract
+    ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
+    #cv2.imshow("Otsu Threshold", thresh)
+    #cv2.waitKey(0)
+    # create rectangular kernel for dilation
+    rect_kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
+    # apply dilation to make regions more clear
+    dilation = cv2.dilate(thresh, rect_kern, iterations = 1)
+    #cv2.imshow("Dilation", dilation)
+    #cv2.waitKey(0)
+    # find contours of regions of interest within license plate
+    contours, hierarchy = cv2.findContours(dilation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    # sort contours left-to-right
+    sorted_contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
+    # create copy of gray image
+    im2 = gray.copy()
+    # create blank string to hold license plate number
+    plate_num = ""
+    # loop through contours and find individual letters and numbers in license plate
+    for cnt in sorted_contours:
+        x,y,w,h = cv2.boundingRect(cnt)
+        height, width = im2.shape
+        # if height of box is not tall enough relative to total height then skip
+        if height / float(h) > 6: continue
+
+        ratio = h / float(w)
+        # if height to width ratio is less than 1.5 skip
+        if ratio < 1.5: continue
+
+        # if width is not wide enough relative to total width then skip
+        if width / float(w) > 15: continue
+
+        area = h * w
+        # if area is less than 100 pixels skip
+        if area < 100: continue
+
+        # draw the rectangle
+        rect = cv2.rectangle(im2, (x,y), (x+w, y+h), (0,255,0),2)
+        # grab character region of image
+        roi = thresh[y-5:y+h+5, x-5:x+w+5]
+        # perfrom bitwise not to flip image to black text on white background
+        roi = cv2.bitwise_not(roi)
+        # perform another blur on character region
+        roi = cv2.medianBlur(roi, 5)
+        try:
+            text = pytesseract.image_to_string(roi, config='-c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ --psm 8 --oem 3')
+            # clean tesseract text by removing any unwanted blank spaces
+            clean_text = re.sub('[\W_]+', '', text)
+            plate_num += clean_text
+        except: 
+            text = None
+    if plate_num != None:
+        print("License Plate #: ", plate_num)
+    #cv2.imshow("Character's Segmented", im2)
+    #cv2.waitKey(0)
+    return plate_num
 
 def load_freeze_layer(model='yolov4', tiny=False):
     if tiny:
@@ -103,7 +176,6 @@ def get_anchors(anchors_path, tiny=False):
         return anchors.reshape(3, 3, 2)
 
 def image_preprocess(image, target_size, gt_boxes=None):
-
     ih, iw    = target_size
     h,  w, _  = image.shape
 
@@ -134,7 +206,7 @@ def format_boxes(bboxes, image_height, image_width):
         box[0], box[1], box[2], box[3] = xmin, ymin, xmax, ymax
     return bboxes
 
-def draw_bbox(image, bboxes, info = False, counted_classes = None, show_label=True, allowed_classes=list(read_class_names(cfg.YOLO.CLASSES).values())):
+def draw_bbox(image, bboxes, info = False, counted_classes = None, show_label=True, allowed_classes=list(read_class_names(cfg.YOLO.CLASSES).values()), read_plate = False):
     classes = read_class_names(cfg.YOLO.CLASSES)
     num_classes = len(classes)
     image_h, image_w, _ = image.shape
@@ -157,6 +229,13 @@ def draw_bbox(image, bboxes, info = False, counted_classes = None, show_label=Tr
         if class_name not in allowed_classes:
             continue
         else:
+            if read_plate:
+                height_ratio = int(image_h / 25)
+                plate_number = recognize_plate(image, coor)
+                if plate_number != None:
+                    cv2.putText(image, plate_number, (int(coor[0]), int(coor[1]-height_ratio)), 
+                            cv2.FONT_HERSHEY_SIMPLEX, 1.25, (255,255,0), 2)
+
             bbox_color = colors[class_ind]
             bbox_thick = int(0.6 * (image_h + image_w) / 600)
             c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
@@ -172,7 +251,7 @@ def draw_bbox(image, bboxes, info = False, counted_classes = None, show_label=Tr
                 cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) #filled
 
                 cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX,
-                            fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
+                        fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
 
             if counted_classes != None:
                 height_ratio = int(image_h / 25)
 
@@ -31,6 +31,8 @@
 flags.DEFINE_boolean('dont_show', False, 'dont show image output')
 flags.DEFINE_boolean('info', False, 'print info on detections')
 flags.DEFINE_boolean('crop', False, 'crop detections from images')
+flags.DEFINE_boolean('ocr', False, 'perform generic OCR on detection regions')
+flags.DEFINE_boolean('plate', False, 'perform license plate recognition')
 
 def main(_argv):
     config = ConfigProto()
@@ -118,15 +120,20 @@ def main(_argv):
                 pass
             crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox, crop_path, allowed_classes)
 
+        # if ocr flag is enabled, perform general text extraction using Tesseract OCR on object detection bounding box
+        if FLAGS.ocr:
+            ocr(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox)
+
+        # if count flag is enabled, perform counting of objects
         if FLAGS.count:
             # count objects found
             counted_classes = count_objects(pred_bbox, by_class = False, allowed_classes=allowed_classes)
             # loop through dict and print
             for key, value in counted_classes.items():
                 print("Number of {}s: {}".format(key, value))
-            image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes)
+            image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate = FLAGS.plate)
         else:
-            image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, allowed_classes=allowed_classes)
+            image = utils.draw_bbox(original_image, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate = FLAGS.plate)
 
         image = Image.fromarray(image.astype(np.uint8))
         if not FLAGS.dont_show:
 
@@ -33,6 +33,7 @@
 flags.DEFINE_boolean('dont_show', False, 'dont show video output')
 flags.DEFINE_boolean('info', False, 'print info on detections')
 flags.DEFINE_boolean('crop', False, 'crop detections from images')
+flags.DEFINE_boolean('plate', False, 'perform license plate recognition')
 
 def main(_argv):
     config = ConfigProto()
@@ -154,9 +155,9 @@ def main(_argv):
             # loop through dict and print
             for key, value in counted_classes.items():
                 print("Number of {}s: {}".format(key, value))
-            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes)
+            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate)
         else:
-            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes)
+            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate)
 
         fps = 1.0 / (time.time() - start_time)
         print("FPS: %.2f" % fps)
 
@@ -0,0 +1,58 @@
+# test file if you want to quickly try tesseract on a license plate image
+import pytesseract
+import cv2
+import os
+import numpy as np
+
+# point to license plate image (works well with custom crop function)
+gray = cv2.imread("./detections/crop/car3/license_plate_.png", 0)
+gray = cv2.resize( gray, None, fx = 3, fy = 3, interpolation = cv2.INTER_CUBIC)
+blur = cv2.GaussianBlur(gray, (5,5), 0)
+gray = cv2.medianBlur(gray, 3)
+# perform otsu thresh (using binary inverse since opencv contours work better with white text)
+ret, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
+cv2.imshow("Otsu", thresh)
+cv2.waitKey(0)
+rect_kern = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
+
+# apply dilation 
+dilation = cv2.dilate(thresh, rect_kern, iterations = 1)
+#cv2.imshow("dilation", dilation)
+#cv2.waitKey(0)
+# find contours
+contours, hierarchy = cv2.findContours(dilation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+sorted_contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0])
+
+# create copy of image
+im2 = gray.copy()
+
+plate_num = ""
+# loop through contours and find letters in license plate
+for cnt in sorted_contours:
+    x,y,w,h = cv2.boundingRect(cnt)
+    height, width = im2.shape
+    
+    # if height of box is not a quarter of total height then skip
+    if height / float(h) > 6: continue
+    ratio = h / float(w)
+    # if height to width ratio is less than 1.5 skip
+    if ratio < 1.5: continue
+    area = h * w
+    # if width is not more than 25 pixels skip
+    if width / float(w) > 15: continue
+    # if area is less than 100 pixels skip
+    if area < 100: continue
+    # draw the rectangle
+    rect = cv2.rectangle(im2, (x,y), (x+w, y+h), (0,255,0),2)
+    roi = thresh[y-5:y+h+5, x-5:x+w+5]
+    roi = cv2.bitwise_not(roi)
+    roi = cv2.medianBlur(roi, 5)
+    #cv2.imshow("ROI", roi)
+    #cv2.waitKey(0)
+    text = pytesseract.image_to_string(roi, config='-c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ --psm 8 --oem 3')
+    #print(text)
+    plate_num += text
+print(plate_num)
+cv2.imshow("Character's Segmented", im2)
+cv2.waitKey(0)
+cv2.destroyAllWindows()
@@ -6,3 +6,4 @@ absl-py
 matplotlib
 easydict
 pillow
+pytesseract
@@ -6,3 +6,4 @@ absl-py
 easydict
 matplotlib
 pillow
+pytesseract