1+ import io
2+ import time
3+ import traceback
4+ from PIL import Image
5+
6+ # Import CodeProject.AI SDK
7+ from codeproject_ai_sdk import LogVerbosity , ModuleRunner , JSON
8+
9+ from options import Options
10+ from paddleocr import PaddleOCR
11+
12+ ocr = None
13+ no_text_found = 'Text Not Found'
14+
15+ def init_detect_ocr (opts : Options ) -> None :
16+
17+ global ocr
18+
19+ # See notes at the end of this file for options.
20+ ocr = PaddleOCR (lang = opts .language ,
21+ use_gpu = opts .use_gpu ,
22+ show_log = opts .log_verbosity == LogVerbosity .Loud ,
23+ det_db_unclip_ratio = opts .det_db_unclip_ratio ,
24+ det_db_box_thresh = opts .box_detect_threshold ,
25+ drop_score = opts .char_detect_threshold ,
26+ rec_algorithm = opts .algorithm ,
27+ cls_model_dir = opts .cls_model_dir ,
28+ det_model_dir = opts .det_model_dir ,
29+ rec_model_dir = opts .rec_model_dir )
30+
31+ def read_text (module_runner : ModuleRunner , image : Image , rotate_deg : int = 0 ) -> JSON :
32+
33+ outputs = []
34+
35+ # rotate image if needed
36+ working_image = image
37+ if rotate_deg != 0 :
38+ working_image = image .rotate (rotate_deg , expand = True , resample = Image .BICUBIC )
39+
40+ # Possibly run it through a super-resolution module to improve readability
41+ # working_image = enhance_image(working_image)
42+
43+ # Read text
44+ inferenceTimeMs = 0
45+ try :
46+ # Convert the image to a bytes array
47+ with io .BytesIO () as image_buffer :
48+ working_image .save (image_buffer , format = 'JPEG' )
49+ img_byte_arr = image_buffer .getvalue ()
50+
51+ start_time = time .perf_counter ()
52+ ocr_response = ocr .ocr (img_byte_arr , cls = True )
53+ inferenceTimeMs = int ((time .perf_counter () - start_time ) * 1000 )
54+
55+ # Note that ocr_response[0][0][0][0] could be a float with value 0 ('false'), or in some
56+ # other universe maybe it's a string. To be really careful we would have a test like
57+ # if hasattr(ocr_response[0][0][0][0], '__len__') and (not isinstance(ocr_response[0][0][0][0], str))
58+ if not ocr_response or not ocr_response [0 ] or not ocr_response [0 ][0 ] or not ocr_response [0 ][0 ][0 ]:
59+ return { "success" : False , "error" : "No OCR response received" , "inferenceMs" : inferenceTimeMs }
60+
61+ # Seems that different versions of paddle return different structures, OR
62+ # paddle returns different structures depending on its mood. We're expecting
63+ # ocr_response = array of single set of detections
64+ # -> detections = array of detection
65+ # -> detection = array of bounding box, classification
66+ # -> bounding box = array of [x,y], classification = array of label, confidence.
67+ # so ocr_response[0][0][0][0][0] = first 'x' of the bounding boxes, which is a float
68+ # However, the first "array of single set of detections" isn't always there, so first
69+ # check to see if ocr_response[0][0][0][0] is a float
70+
71+ detections = ocr_response if isinstance (ocr_response [0 ][0 ][0 ][0 ], float ) else ocr_response [0 ]
72+
73+ for detection in detections :
74+ bounding_box = detection [0 ] # [ topleft, topright, bottom right, bottom left ], each is [x,y]
75+ classification = detection [1 ]
76+
77+ label = classification [0 ]
78+ confidence = classification [1 ]
79+
80+ if label and confidence :
81+
82+ # Obviously some optimisation can be done here, but is it worth
83+ # it? The trivial optimisation is to assume the order of the
84+ # points, but that's dangerous.
85+
86+ detection = {
87+ "confidence" : confidence ,
88+ "label" : label ,
89+ "x_min" : int (min (point [0 ] for point in bounding_box )), # int(bounding_box[0][0]),
90+ "y_min" : int (min (point [1 ] for point in bounding_box )), # int(bounding_box[0][1]),
91+ "x_max" : int (max (point [0 ] for point in bounding_box )), # int(bounding_box[3][0]),
92+ "y_max" : int (max (point [1 ] for point in bounding_box )), # int(bounding_box[3][1]),
93+ }
94+ outputs .append (detection )
95+
96+ # The operation was successfully completed. There just wasn't any text
97+ # if not outputs:
98+ # return { "success": False, "predictions" : None, "inferenceMs" : inferenceTimeMs }
99+
100+ return { "success" : True , "predictions" : outputs , "inferenceMs" : inferenceTimeMs }
101+
102+ except Exception as ex :
103+ module_runner .report_error (ex , __file__ )
104+
105+ message = "" .join (traceback .TracebackException .from_exception (ex ).format ())
106+ return { "success" : False , "error" : message , "inferenceMs" : inferenceTimeMs }
107+
108+
109+ """
110+ Options for the PaddleOCR object:
111+
112+ Parameter Default Description
113+ -------------------------------------------------------------------------------
114+ use_gpu TRUE use GPU or not
115+ gpu_mem 8000M GPU memory size used for initialization
116+ image_dir The images path or folder path for predicting
117+ when used by the command line
118+ det_algorithm DB Type of detection algorithm selected (DB = Differentiable Binarization)
119+ det_model_dir None the text detection inference model folder. There
120+ are two ways to transfer parameters, 1. None: Automatically download
121+ the built-in model to ~/.paddleocr/det;
122+ 2. The path of the inference model converted by yourself, the model
123+ and params files must be included in the model path
124+ det_max_side_len 960 The maximum size of the long side of the image. When the long side
125+ exceeds this value, the long side will be resized to this size, and
126+ the short side will be scaled proportionally
127+ det_db_thresh 0.3 Binarization threshold value of DB output map
128+ det_db_box_thresh 0.5 The threshold value of the DB output box. Boxes score lower than
129+ this value will be discarded
130+ det_db_unclip_ratio 2 The expanded ratio of DB output box
131+ det_east_score_thresh 0.8 Binarization threshold value of EAST output map
132+ det_east_cover_thresh 0.1 The threshold value of the EAST output box. Boxes score lower than
133+ this value will be discarded
134+ det_east_nms_thresh 0.2 The NMS threshold value of EAST model output box
135+ rec_algorithm CRNN Type of recognition algorithm selected
136+ rec_model_dir None the text recognition inference model folder. There are two ways to
137+ transfer parameters, 1. None: Automatically download the built-in
138+ model to ~/.paddleocr/rec; 2. The path of the inference model
139+ converted by yourself, the model and params files must be included
140+ in the model path
141+ rec_image_shape "3,32,320" image shape of recognition algorithm
142+ rec_char_type ch Character type of recognition algorithm, Chinese (ch) or English (en)
143+ rec_batch_num 30 When performing recognition, the batchsize of forward images
144+ max_text_length 25 The maximum text length that the recognition algorithm can recognize
145+ rec_char_dict_path ./ppocr/utils/ppocr_keys_v1.txt the alphabet path which needs to be modified to
146+ your own path when rec_model_Name use mode 2
147+ use_space_char TRUE Whether to recognize spaces
148+ use_angle_cls FALSE Whether to load classification model
149+ cls_model_dir None the classification inference model folder. There are two ways to
150+ transfer parameters, 1. None: Automatically download the built-in model
151+ to ~/.paddleocr/cls; 2. The path of the inference model converted
152+ by yourself, the model and params files must be included in the
153+ model path
154+ cls_image_shape "3,48,192" image shape of classification algorithm
155+ label_list ['0','180'] label list of classification algorithm
156+ cls_batch_num 30 When performing classification, the batchsize of forward images
157+ enable_mkldnn FALSE Whether to enable mkldnn
158+ use_zero_copy_run FALSE Whether to forward by zero_copy_run
159+ lang ch The support language, Only Chinese (ch), English (en), French (french),
160+ German (german), Korean (korean), Japanese (japan) are supported
161+ det TRUE Enable detection when ppocr.ocr func exec
162+ rec TRUE Enable recognition when ppocr.ocr func exec
163+ cls FALSE Enable classification when ppocr.ocr func exec. This parameter only
164+ exists in code usage mode
165+ """
0 commit comments