codeproject
diff --git a/‎OCR.py‎
Lines changed: 165 additions & 0 deletions b/‎OCR.py‎
Lines changed: 165 additions & 0 deletions
diff --git a/‎OCR.pyproj‎
Lines changed: 157 additions & 0 deletions b/‎OCR.pyproj‎
Lines changed: 157 additions & 0 deletions
@@ -0,0 +1,165 @@
+import io
+import time
+import traceback
+from PIL import Image
+
+# Import CodeProject.AI SDK
+from codeproject_ai_sdk import LogVerbosity, ModuleRunner, JSON
+
+from options import Options
+from paddleocr import PaddleOCR
+
+ocr = None
+no_text_found = 'Text Not Found'
+
+def init_detect_ocr(opts: Options) -> None:
+
+    global ocr
+  
+    # See notes at the end of this file for options.
+    ocr = PaddleOCR(lang                = opts.language,
+                    use_gpu             = opts.use_gpu,
+                    show_log            = opts.log_verbosity == LogVerbosity.Loud,
+                    det_db_unclip_ratio = opts.det_db_unclip_ratio,
+                    det_db_box_thresh   = opts.box_detect_threshold,
+                    drop_score          = opts.char_detect_threshold,
+                    rec_algorithm       = opts.algorithm,
+                    cls_model_dir       = opts.cls_model_dir,
+                    det_model_dir       = opts.det_model_dir,
+                    rec_model_dir       = opts.rec_model_dir)
+
+def read_text(module_runner: ModuleRunner, image: Image, rotate_deg: int = 0) -> JSON:
+
+    outputs = []
+
+    # rotate image if needed
+    working_image = image
+    if rotate_deg != 0:
+        working_image = image.rotate(rotate_deg, expand=True, resample=Image.BICUBIC)
+       
+    # Possibly run it through a super-resolution module to improve readability
+    # working_image = enhance_image(working_image)
+
+    # Read text
+    inferenceTimeMs = 0
+    try:
+        # Convert the image to a bytes array
+        with io.BytesIO() as image_buffer:
+            working_image.save(image_buffer, format='JPEG')
+            img_byte_arr = image_buffer.getvalue()
+
+        start_time = time.perf_counter()
+        ocr_response = ocr.ocr(img_byte_arr, cls=True)
+        inferenceTimeMs = int((time.perf_counter() - start_time) * 1000)
+
+        # Note that ocr_response[0][0][0][0] could be a float with value 0 ('false'), or in some
+        # other universe maybe it's a string. To be really careful we would have a test like
+        # if hasattr(ocr_response[0][0][0][0], '__len__') and (not isinstance(ocr_response[0][0][0][0], str))
+        if not ocr_response or not ocr_response[0] or not ocr_response[0][0] or not ocr_response[0][0][0]:
+            return { "success": False, "error": "No OCR response received", "inferenceMs" : inferenceTimeMs }
+
+        # Seems that different versions of paddle return different structures, OR
+        # paddle returns different structures depending on its mood. We're expecting
+        # ocr_response = array of single set of detections
+        #  -> detections = array of detection
+        #    -> detection = array of bounding box, classification
+        #       -> bounding box = array of [x,y], classification = array of label, confidence.
+        # so ocr_response[0][0][0][0][0] = first 'x' of the bounding boxes, which is a float
+        # However, the first "array of single set of detections" isn't always there, so first
+        # check to see if ocr_response[0][0][0][0] is a float
+
+        detections = ocr_response if isinstance(ocr_response[0][0][0][0], float) else ocr_response[0]
+
+        for detection in detections:
+            bounding_box   = detection[0] # [ topleft, topright, bottom right, bottom left ], each is [x,y]
+            classification = detection[1]
+
+            label      = classification[0]
+            confidence = classification[1]
+
+            if label and confidence:
+
+                # Obviously some optimisation can be done here, but is it worth 
+                # it? The trivial optimisation is to assume the order of the 
+                # points, but that's dangerous.
+
+                detection = {
+                    "confidence": confidence,
+                    "label": label,
+                    "x_min": int(min(point[0] for point in bounding_box)),   # int(bounding_box[0][0]),
+                    "y_min": int(min(point[1] for point in bounding_box)),   # int(bounding_box[0][1]),
+                    "x_max": int(max(point[0] for point in bounding_box)),   # int(bounding_box[3][0]),
+                    "y_max": int(max(point[1] for point in bounding_box)),   # int(bounding_box[3][1]),
+                }
+                outputs.append(detection)
+
+        # The operation  was successfully completed. There just wasn't any text
+        # if not outputs:
+        #    return { "success": False, "predictions" : None, "inferenceMs" : inferenceTimeMs }
+
+        return { "success": True, "predictions" : outputs, "inferenceMs" : inferenceTimeMs }
+
+    except Exception as ex:
+        module_runner.report_error(ex, __file__)
+
+        message = "".join(traceback.TracebackException.from_exception(ex).format())
+        return { "success": False, "error": message, "inferenceMs" : inferenceTimeMs }
+
+
+"""
+Options for the PaddleOCR object:
+
+Parameter                  Default    Description
+-------------------------------------------------------------------------------
+use_gpu                     TRUE      use GPU or not
+gpu_mem                     8000M     GPU memory size used for initialization    
+image_dir                             The images path or folder path for predicting 
+                                      when used by the command line    
+det_algorithm               DB        Type of detection algorithm selected (DB = Differentiable Binarization)
+det_model_dir               None      the text detection inference model folder. There
+                                      are two ways to transfer parameters, 1. None: Automatically download
+                                      the built-in model to ~/.paddleocr/det; 
+                                      2. The path of the inference model converted by yourself, the model
+                                      and params files must be included in the model path
+det_max_side_len            960       The maximum size of the long side of the image. When the long side 
+                                      exceeds this value, the long side will be resized to this size, and
+                                      the short side will be scaled proportionally    
+det_db_thresh               0.3       Binarization threshold value of DB output map
+det_db_box_thresh           0.5       The threshold value of the DB output box. Boxes score lower than 
+                                      this value will be discarded
+det_db_unclip_ratio         2         The expanded ratio of DB output box
+det_east_score_thresh       0.8       Binarization threshold value of EAST output map
+det_east_cover_thresh       0.1       The threshold value of the EAST output box. Boxes score lower than 
+                                      this value will be discarded
+det_east_nms_thresh         0.2       The NMS threshold value of EAST model output box
+rec_algorithm               CRNN      Type of recognition algorithm selected
+rec_model_dir               None      the text recognition inference model folder. There are two ways to 
+                                      transfer parameters, 1. None: Automatically download the built-in 
+                                      model to ~/.paddleocr/rec; 2. The path of the inference model 
+                                      converted by yourself, the model and params files must be included 
+                                      in the model path
+rec_image_shape             "3,32,320" image shape of recognition algorithm
+rec_char_type               ch        Character type of recognition algorithm, Chinese (ch) or English (en)
+rec_batch_num               30        When performing recognition, the batchsize of forward images
+max_text_length             25        The maximum text length that the recognition algorithm can recognize
+rec_char_dict_path          ./ppocr/utils/ppocr_keys_v1.txt the alphabet path which needs to be modified to 
+                                      your own path when rec_model_Name use mode 2
+use_space_char              TRUE      Whether to recognize spaces
+use_angle_cls               FALSE     Whether to load classification model
+cls_model_dir               None      the classification inference model folder. There are two ways to 
+                                      transfer parameters, 1. None: Automatically download the built-in model 
+                                      to ~/.paddleocr/cls; 2. The path of the inference model converted 
+                                      by yourself, the model and params files must be included in the 
+                                      model path
+cls_image_shape              "3,48,192"    image shape of classification algorithm
+label_list                  ['0','180']    label list of classification algorithm    
+cls_batch_num               30        When performing classification, the batchsize of forward images
+enable_mkldnn               FALSE     Whether to enable mkldnn
+use_zero_copy_run           FALSE     Whether to forward by zero_copy_run
+lang                        ch        The support language, Only Chinese (ch), English (en), French (french), 
+                                      German (german), Korean (korean), Japanese (japan) are supported
+det                         TRUE      Enable detection when ppocr.ocr func exec
+rec                         TRUE      Enable recognition when ppocr.ocr func exec
+cls                         FALSE     Enable classification when ppocr.ocr func exec. This parameter only 
+                                      exists in code usage mode    
+"""
@@ -0,0 +1,157 @@
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="4.0">
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{0690d5f7-864f-4347-8e20-fa9903ce56eb}</ProjectGuid>
+    <ProjectHome>.</ProjectHome>
+    <StartupFile>OCR_adapter.py</StartupFile>
+    <SearchPath>..\SDK\Python</SearchPath>
+    <WorkingDirectory>.</WorkingDirectory>
+    <OutputPath>.</OutputPath>
+    <Name>OCR</Name>
+    <RootNamespace>OCR</RootNamespace>
+    <IsWindowsApplication>False</IsWindowsApplication>
+    <InterpreterId>MSBuild|venv|$(MSBuildProjectFullPath)</InterpreterId>
+    <LaunchProvider>Standard Python launcher</LaunchProvider>
+    <EnableNativeCodeDebugging>False</EnableNativeCodeDebugging>
+    <EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
+    <DebugSymbols>true</DebugSymbols>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="OCR.py" />
+    <Compile Include="options.py" />
+    <Compile Include="OCR_adapter.py" />
+    <Compile Include="patch\paddlepaddle-2.4.2\image.py" />
+    <Compile Include="patch\paddleocr-2.6.0.1\db_postprocess.py" />
+    <Compile Include="__init__.py" />
+    <Content Include="explore.html" />
+    <Content Include="install.bat" />
+    <Content Include="install.sh" />
+    <Content Include="package.bat" />
+    <Content Include="package.sh" />
+    <Content Include="paddleocr\ch_ppocr_mobile_v2.0_cls_infer\inference.pdiparams" />
+    <Content Include="paddleocr\ch_ppocr_mobile_v2.0_cls_infer\inference.pdiparams.info" />
+    <Content Include="paddleocr\ch_ppocr_mobile_v2.0_cls_infer\inference.pdmodel" />
+    <Content Include="paddleocr\en_PP-OCRv3_det_infer\en_PP-OCRv3_det_infer.tar" />
+    <Content Include="paddleocr\en_PP-OCRv3_det_infer\inference.pdiparams" />
+    <Content Include="paddleocr\en_PP-OCRv3_det_infer\inference.pdiparams.info" />
+    <Content Include="paddleocr\en_PP-OCRv3_det_infer\inference.pdmodel" />
+    <Content Include="paddleocr\en_PP-OCRv3_rec_infer\inference.pdiparams" />
+    <Content Include="paddleocr\en_PP-OCRv3_rec_infer\inference.pdiparams.info" />
+    <Content Include="paddleocr\en_PP-OCRv3_rec_infer\inference.pdmodel" />
+    <Content Include="post_install.sh" />
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="modulesettings.json" />
+    <Content Include="modulesettings.orangepi.json">
+      <DependentUpon>modulesettings.json</DependentUpon>
+    </Content>
+    <Content Include="modulesettings.radxarock.json">
+      <DependentUpon>modulesettings.json</DependentUpon>
+    </Content>
+    <Content Include="modulesettings.jetson.json">
+      <DependentUpon>modulesettings.json</DependentUpon>
+    </Content>
+    <Content Include="modulesettings.raspberrypi.json">
+      <DependentUpon>modulesettings.json</DependentUpon>
+    </Content>
+    <Content Include="modulesettings.linux.json">
+      <DependentUpon>modulesettings.json</DependentUpon>
+    </Content>
+    <Content Include="modulesettings.macos.json">
+      <DependentUpon>modulesettings.json</DependentUpon>
+    </Content>
+    <Content Include="modulesettings.windows.json">
+      <DependentUpon>modulesettings.json</DependentUpon>
+    </Content>
+  </ItemGroup>
+  <ItemGroup>
+    <Content Include="requirements.txt" />
+    <Content Include="requirements.linux.cuda11.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.raspberrypi.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.orangepi.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.radxarock.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.linux.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.windows.cuda.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.linux.arm64.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.macos.arm64.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.macos.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.linux.cuda10.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.linux.cuda11_6.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.linux.cuda11_7.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.linux.cuda11_8.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.linux.cuda12.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.windows.cuda10.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.windows.cuda11.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.windows.cuda11_6.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.windows.cuda11_7.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.windows.cuda11_8.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+    <Content Include="requirements.windows.cuda12.txt">
+      <DependentUpon>requirements.txt</DependentUpon>
+    </Content>
+  </ItemGroup>
+  <ItemGroup>
+    <Folder Include="paddleocr\" />
+    <Folder Include="paddleocr\ch_ppocr_mobile_v2.0_cls_infer\" />
+    <Folder Include="paddleocr\en_PP-OCRv3_det_infer\" />
+    <Folder Include="paddleocr\en_PP-OCRv3_rec_infer\" />
+    <Folder Include="patch\" />
+    <Folder Include="patch\paddlepaddle-2.4.2\" />
+    <Folder Include="patch\paddlepaddle-2.4.0rc0\" />
+    <Folder Include="patch\paddleocr-2.6.0.1\" />
+  </ItemGroup>
+  <ItemGroup>
+    <Interpreter Include="bin\windows\python39\venv\">
+      <Id>venv</Id>
+      <Version>3.9</Version>
+      <Description>venv (Python 3.9 (64-bit))</Description>
+      <InterpreterPath>Scripts\python.exe</InterpreterPath>
+      <WindowsInterpreterPath>Scripts\pythonw.exe</WindowsInterpreterPath>
+      <PathEnvironmentVariable>PYTHONPATH</PathEnvironmentVariable>
+      <Architecture>X64</Architecture>
+    </Interpreter>
+  </ItemGroup>
+  <Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets" />
+</Project>