Skip to content

Commit 376fcb4

Browse files
author
Chris Maunder
committed
Initial Commit
0 parents  commit 376fcb4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+3040
-0
lines changed

OCR.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
import io
2+
import time
3+
import traceback
4+
from PIL import Image
5+
6+
# Import CodeProject.AI SDK
7+
from codeproject_ai_sdk import LogVerbosity, ModuleRunner, JSON
8+
9+
from options import Options
10+
from paddleocr import PaddleOCR
11+
12+
ocr = None
13+
no_text_found = 'Text Not Found'
14+
15+
def init_detect_ocr(opts: Options) -> None:
16+
17+
global ocr
18+
19+
# See notes at the end of this file for options.
20+
ocr = PaddleOCR(lang = opts.language,
21+
use_gpu = opts.use_gpu,
22+
show_log = opts.log_verbosity == LogVerbosity.Loud,
23+
det_db_unclip_ratio = opts.det_db_unclip_ratio,
24+
det_db_box_thresh = opts.box_detect_threshold,
25+
drop_score = opts.char_detect_threshold,
26+
rec_algorithm = opts.algorithm,
27+
cls_model_dir = opts.cls_model_dir,
28+
det_model_dir = opts.det_model_dir,
29+
rec_model_dir = opts.rec_model_dir)
30+
31+
def read_text(module_runner: ModuleRunner, image: Image, rotate_deg: int = 0) -> JSON:
32+
33+
outputs = []
34+
35+
# rotate image if needed
36+
working_image = image
37+
if rotate_deg != 0:
38+
working_image = image.rotate(rotate_deg, expand=True, resample=Image.BICUBIC)
39+
40+
# Possibly run it through a super-resolution module to improve readability
41+
# working_image = enhance_image(working_image)
42+
43+
# Read text
44+
inferenceTimeMs = 0
45+
try:
46+
# Convert the image to a bytes array
47+
with io.BytesIO() as image_buffer:
48+
working_image.save(image_buffer, format='JPEG')
49+
img_byte_arr = image_buffer.getvalue()
50+
51+
start_time = time.perf_counter()
52+
ocr_response = ocr.ocr(img_byte_arr, cls=True)
53+
inferenceTimeMs = int((time.perf_counter() - start_time) * 1000)
54+
55+
# Note that ocr_response[0][0][0][0] could be a float with value 0 ('false'), or in some
56+
# other universe maybe it's a string. To be really careful we would have a test like
57+
# if hasattr(ocr_response[0][0][0][0], '__len__') and (not isinstance(ocr_response[0][0][0][0], str))
58+
if not ocr_response or not ocr_response[0] or not ocr_response[0][0] or not ocr_response[0][0][0]:
59+
return { "success": False, "error": "No OCR response received", "inferenceMs" : inferenceTimeMs }
60+
61+
# Seems that different versions of paddle return different structures, OR
62+
# paddle returns different structures depending on its mood. We're expecting
63+
# ocr_response = array of single set of detections
64+
# -> detections = array of detection
65+
# -> detection = array of bounding box, classification
66+
# -> bounding box = array of [x,y], classification = array of label, confidence.
67+
# so ocr_response[0][0][0][0][0] = first 'x' of the bounding boxes, which is a float
68+
# However, the first "array of single set of detections" isn't always there, so first
69+
# check to see if ocr_response[0][0][0][0] is a float
70+
71+
detections = ocr_response if isinstance(ocr_response[0][0][0][0], float) else ocr_response[0]
72+
73+
for detection in detections:
74+
bounding_box = detection[0] # [ topleft, topright, bottom right, bottom left ], each is [x,y]
75+
classification = detection[1]
76+
77+
label = classification[0]
78+
confidence = classification[1]
79+
80+
if label and confidence:
81+
82+
# Obviously some optimisation can be done here, but is it worth
83+
# it? The trivial optimisation is to assume the order of the
84+
# points, but that's dangerous.
85+
86+
detection = {
87+
"confidence": confidence,
88+
"label": label,
89+
"x_min": int(min(point[0] for point in bounding_box)), # int(bounding_box[0][0]),
90+
"y_min": int(min(point[1] for point in bounding_box)), # int(bounding_box[0][1]),
91+
"x_max": int(max(point[0] for point in bounding_box)), # int(bounding_box[3][0]),
92+
"y_max": int(max(point[1] for point in bounding_box)), # int(bounding_box[3][1]),
93+
}
94+
outputs.append(detection)
95+
96+
# The operation was successfully completed. There just wasn't any text
97+
# if not outputs:
98+
# return { "success": False, "predictions" : None, "inferenceMs" : inferenceTimeMs }
99+
100+
return { "success": True, "predictions" : outputs, "inferenceMs" : inferenceTimeMs }
101+
102+
except Exception as ex:
103+
module_runner.report_error(ex, __file__)
104+
105+
message = "".join(traceback.TracebackException.from_exception(ex).format())
106+
return { "success": False, "error": message, "inferenceMs" : inferenceTimeMs }
107+
108+
109+
"""
110+
Options for the PaddleOCR object:
111+
112+
Parameter Default Description
113+
-------------------------------------------------------------------------------
114+
use_gpu TRUE use GPU or not
115+
gpu_mem 8000M GPU memory size used for initialization
116+
image_dir The images path or folder path for predicting
117+
when used by the command line
118+
det_algorithm DB Type of detection algorithm selected (DB = Differentiable Binarization)
119+
det_model_dir None the text detection inference model folder. There
120+
are two ways to transfer parameters, 1. None: Automatically download
121+
the built-in model to ~/.paddleocr/det;
122+
2. The path of the inference model converted by yourself, the model
123+
and params files must be included in the model path
124+
det_max_side_len 960 The maximum size of the long side of the image. When the long side
125+
exceeds this value, the long side will be resized to this size, and
126+
the short side will be scaled proportionally
127+
det_db_thresh 0.3 Binarization threshold value of DB output map
128+
det_db_box_thresh 0.5 The threshold value of the DB output box. Boxes score lower than
129+
this value will be discarded
130+
det_db_unclip_ratio 2 The expanded ratio of DB output box
131+
det_east_score_thresh 0.8 Binarization threshold value of EAST output map
132+
det_east_cover_thresh 0.1 The threshold value of the EAST output box. Boxes score lower than
133+
this value will be discarded
134+
det_east_nms_thresh 0.2 The NMS threshold value of EAST model output box
135+
rec_algorithm CRNN Type of recognition algorithm selected
136+
rec_model_dir None the text recognition inference model folder. There are two ways to
137+
transfer parameters, 1. None: Automatically download the built-in
138+
model to ~/.paddleocr/rec; 2. The path of the inference model
139+
converted by yourself, the model and params files must be included
140+
in the model path
141+
rec_image_shape "3,32,320" image shape of recognition algorithm
142+
rec_char_type ch Character type of recognition algorithm, Chinese (ch) or English (en)
143+
rec_batch_num 30 When performing recognition, the batchsize of forward images
144+
max_text_length 25 The maximum text length that the recognition algorithm can recognize
145+
rec_char_dict_path ./ppocr/utils/ppocr_keys_v1.txt the alphabet path which needs to be modified to
146+
your own path when rec_model_Name use mode 2
147+
use_space_char TRUE Whether to recognize spaces
148+
use_angle_cls FALSE Whether to load classification model
149+
cls_model_dir None the classification inference model folder. There are two ways to
150+
transfer parameters, 1. None: Automatically download the built-in model
151+
to ~/.paddleocr/cls; 2. The path of the inference model converted
152+
by yourself, the model and params files must be included in the
153+
model path
154+
cls_image_shape "3,48,192" image shape of classification algorithm
155+
label_list ['0','180'] label list of classification algorithm
156+
cls_batch_num 30 When performing classification, the batchsize of forward images
157+
enable_mkldnn FALSE Whether to enable mkldnn
158+
use_zero_copy_run FALSE Whether to forward by zero_copy_run
159+
lang ch The support language, Only Chinese (ch), English (en), French (french),
160+
German (german), Korean (korean), Japanese (japan) are supported
161+
det TRUE Enable detection when ppocr.ocr func exec
162+
rec TRUE Enable recognition when ppocr.ocr func exec
163+
cls FALSE Enable classification when ppocr.ocr func exec. This parameter only
164+
exists in code usage mode
165+
"""

OCR.pyproj

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="4.0">
2+
<PropertyGroup>
3+
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
4+
<SchemaVersion>2.0</SchemaVersion>
5+
<ProjectGuid>{0690d5f7-864f-4347-8e20-fa9903ce56eb}</ProjectGuid>
6+
<ProjectHome>.</ProjectHome>
7+
<StartupFile>OCR_adapter.py</StartupFile>
8+
<SearchPath>..\SDK\Python</SearchPath>
9+
<WorkingDirectory>.</WorkingDirectory>
10+
<OutputPath>.</OutputPath>
11+
<Name>OCR</Name>
12+
<RootNamespace>OCR</RootNamespace>
13+
<IsWindowsApplication>False</IsWindowsApplication>
14+
<InterpreterId>MSBuild|venv|$(MSBuildProjectFullPath)</InterpreterId>
15+
<LaunchProvider>Standard Python launcher</LaunchProvider>
16+
<EnableNativeCodeDebugging>False</EnableNativeCodeDebugging>
17+
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
18+
</PropertyGroup>
19+
<PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
20+
<DebugSymbols>true</DebugSymbols>
21+
</PropertyGroup>
22+
<PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
23+
</PropertyGroup>
24+
<ItemGroup>
25+
<Compile Include="OCR.py" />
26+
<Compile Include="options.py" />
27+
<Compile Include="OCR_adapter.py" />
28+
<Compile Include="patch\paddlepaddle-2.4.2\image.py" />
29+
<Compile Include="patch\paddleocr-2.6.0.1\db_postprocess.py" />
30+
<Compile Include="__init__.py" />
31+
<Content Include="explore.html" />
32+
<Content Include="install.bat" />
33+
<Content Include="install.sh" />
34+
<Content Include="package.bat" />
35+
<Content Include="package.sh" />
36+
<Content Include="paddleocr\ch_ppocr_mobile_v2.0_cls_infer\inference.pdiparams" />
37+
<Content Include="paddleocr\ch_ppocr_mobile_v2.0_cls_infer\inference.pdiparams.info" />
38+
<Content Include="paddleocr\ch_ppocr_mobile_v2.0_cls_infer\inference.pdmodel" />
39+
<Content Include="paddleocr\en_PP-OCRv3_det_infer\en_PP-OCRv3_det_infer.tar" />
40+
<Content Include="paddleocr\en_PP-OCRv3_det_infer\inference.pdiparams" />
41+
<Content Include="paddleocr\en_PP-OCRv3_det_infer\inference.pdiparams.info" />
42+
<Content Include="paddleocr\en_PP-OCRv3_det_infer\inference.pdmodel" />
43+
<Content Include="paddleocr\en_PP-OCRv3_rec_infer\inference.pdiparams" />
44+
<Content Include="paddleocr\en_PP-OCRv3_rec_infer\inference.pdiparams.info" />
45+
<Content Include="paddleocr\en_PP-OCRv3_rec_infer\inference.pdmodel" />
46+
<Content Include="post_install.sh" />
47+
</ItemGroup>
48+
<ItemGroup>
49+
<Content Include="modulesettings.json" />
50+
<Content Include="modulesettings.orangepi.json">
51+
<DependentUpon>modulesettings.json</DependentUpon>
52+
</Content>
53+
<Content Include="modulesettings.radxarock.json">
54+
<DependentUpon>modulesettings.json</DependentUpon>
55+
</Content>
56+
<Content Include="modulesettings.jetson.json">
57+
<DependentUpon>modulesettings.json</DependentUpon>
58+
</Content>
59+
<Content Include="modulesettings.raspberrypi.json">
60+
<DependentUpon>modulesettings.json</DependentUpon>
61+
</Content>
62+
<Content Include="modulesettings.linux.json">
63+
<DependentUpon>modulesettings.json</DependentUpon>
64+
</Content>
65+
<Content Include="modulesettings.macos.json">
66+
<DependentUpon>modulesettings.json</DependentUpon>
67+
</Content>
68+
<Content Include="modulesettings.windows.json">
69+
<DependentUpon>modulesettings.json</DependentUpon>
70+
</Content>
71+
</ItemGroup>
72+
<ItemGroup>
73+
<Content Include="requirements.txt" />
74+
<Content Include="requirements.linux.cuda11.txt">
75+
<DependentUpon>requirements.txt</DependentUpon>
76+
</Content>
77+
<Content Include="requirements.raspberrypi.txt">
78+
<DependentUpon>requirements.txt</DependentUpon>
79+
</Content>
80+
<Content Include="requirements.orangepi.txt">
81+
<DependentUpon>requirements.txt</DependentUpon>
82+
</Content>
83+
<Content Include="requirements.radxarock.txt">
84+
<DependentUpon>requirements.txt</DependentUpon>
85+
</Content>
86+
<Content Include="requirements.linux.txt">
87+
<DependentUpon>requirements.txt</DependentUpon>
88+
</Content>
89+
<Content Include="requirements.windows.cuda.txt">
90+
<DependentUpon>requirements.txt</DependentUpon>
91+
</Content>
92+
<Content Include="requirements.linux.arm64.txt">
93+
<DependentUpon>requirements.txt</DependentUpon>
94+
</Content>
95+
<Content Include="requirements.macos.arm64.txt">
96+
<DependentUpon>requirements.txt</DependentUpon>
97+
</Content>
98+
<Content Include="requirements.macos.txt">
99+
<DependentUpon>requirements.txt</DependentUpon>
100+
</Content>
101+
<Content Include="requirements.linux.cuda10.txt">
102+
<DependentUpon>requirements.txt</DependentUpon>
103+
</Content>
104+
<Content Include="requirements.linux.cuda11_6.txt">
105+
<DependentUpon>requirements.txt</DependentUpon>
106+
</Content>
107+
<Content Include="requirements.linux.cuda11_7.txt">
108+
<DependentUpon>requirements.txt</DependentUpon>
109+
</Content>
110+
<Content Include="requirements.linux.cuda11_8.txt">
111+
<DependentUpon>requirements.txt</DependentUpon>
112+
</Content>
113+
<Content Include="requirements.linux.cuda12.txt">
114+
<DependentUpon>requirements.txt</DependentUpon>
115+
</Content>
116+
<Content Include="requirements.windows.cuda10.txt">
117+
<DependentUpon>requirements.txt</DependentUpon>
118+
</Content>
119+
<Content Include="requirements.windows.cuda11.txt">
120+
<DependentUpon>requirements.txt</DependentUpon>
121+
</Content>
122+
<Content Include="requirements.windows.cuda11_6.txt">
123+
<DependentUpon>requirements.txt</DependentUpon>
124+
</Content>
125+
<Content Include="requirements.windows.cuda11_7.txt">
126+
<DependentUpon>requirements.txt</DependentUpon>
127+
</Content>
128+
<Content Include="requirements.windows.cuda11_8.txt">
129+
<DependentUpon>requirements.txt</DependentUpon>
130+
</Content>
131+
<Content Include="requirements.windows.cuda12.txt">
132+
<DependentUpon>requirements.txt</DependentUpon>
133+
</Content>
134+
</ItemGroup>
135+
<ItemGroup>
136+
<Folder Include="paddleocr\" />
137+
<Folder Include="paddleocr\ch_ppocr_mobile_v2.0_cls_infer\" />
138+
<Folder Include="paddleocr\en_PP-OCRv3_det_infer\" />
139+
<Folder Include="paddleocr\en_PP-OCRv3_rec_infer\" />
140+
<Folder Include="patch\" />
141+
<Folder Include="patch\paddlepaddle-2.4.2\" />
142+
<Folder Include="patch\paddlepaddle-2.4.0rc0\" />
143+
<Folder Include="patch\paddleocr-2.6.0.1\" />
144+
</ItemGroup>
145+
<ItemGroup>
146+
<Interpreter Include="bin\windows\python39\venv\">
147+
<Id>venv</Id>
148+
<Version>3.9</Version>
149+
<Description>venv (Python 3.9 (64-bit))</Description>
150+
<InterpreterPath>Scripts\python.exe</InterpreterPath>
151+
<WindowsInterpreterPath>Scripts\pythonw.exe</WindowsInterpreterPath>
152+
<PathEnvironmentVariable>PYTHONPATH</PathEnvironmentVariable>
153+
<Architecture>X64</Architecture>
154+
</Interpreter>
155+
</ItemGroup>
156+
<Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets" />
157+
</Project>

0 commit comments

Comments
 (0)