chineseocr
diff --git a/‎README.md
+7-10 b/‎README.md
+7-10
diff --git a/‎app.py
-2 b/‎app.py
-2
diff --git a/‎config.py
+1-1 b/‎config.py
+1-1
diff --git a/‎dnn/main.py
+6-2 b/‎dnn/main.py
+6-2
diff --git a/‎dnn/ocr.py
+51-13 b/‎dnn/ocr.py
+51-13
diff --git a/‎models/ocr/chinese-5k/ocr.cfg
+4 b/‎models/ocr/chinese-5k/ocr.cfg
+4
diff --git a/‎models/ocr/chinese/ocr.cfg
+4 b/‎models/ocr/chinese/ocr.cfg
+4
diff --git a/‎models/ocr/english/ocr.cfg
+4 b/‎models/ocr/english/ocr.cfg
+4
diff --git a/‎models/ocr/japanese/ocr.cfg
+4 b/‎models/ocr/japanese/ocr.cfg
+4
diff --git a/‎static/js/helps.js
+7-4 b/‎static/js/helps.js
+7-4
diff --git a/‎templates/text.html
-3 b/‎templates/text.html
-3
diff --git a/‎test/bank.png
-1.14 MB b/‎test/bank.png
-1.14 MB
diff --git a/‎test/demo-line.png
336 KB b/‎test/demo-line.png
336 KB
diff --git a/‎test/demo-rotate.png
831 KB b/‎test/demo-rotate.png
831 KB
diff --git a/‎test/dinge.png
-1.38 MB b/‎test/dinge.png
-1.38 MB
diff --git a/‎test/img-demo.png
-765 KB b/‎test/img-demo.png
-765 KB
diff --git a/‎test/japanese-demo.png
161 KB b/‎test/japanese-demo.png
161 KB
diff --git a/‎test/japnese.png
19.5 KB b/‎test/japnese.png
19.5 KB
diff --git a/‎test/ocr.jpeg
-1.63 KB b/‎test/ocr.jpeg
-1.63 KB
diff --git a/‎test/ocr.png
-268 KB b/‎test/ocr.png
-268 KB
diff --git a/‎test/sh.png
-957 KB b/‎test/sh.png
-957 KB
diff --git a/‎test/song.png
-1.02 MB b/‎test/song.png
-1.02 MB
diff --git a/‎test/text.png
-763 KB b/‎test/text.png
-763 KB
@@ -1,5 +1,5 @@
 ## 本项目基于darknet(https://github.com/pjreddie/darknet.git)框架实现CTPN版本自然场景文字检测 与CNN+CTCOCR文字识别
-## python版本（3.6）
+## 支持系统:mac/ubuntu python=3.6  
 ##  实现功能    
 - [x]  文字检测；  
 - [x]  文字识别；  
@@ -16,11 +16,12 @@
 
 ## 编译对GPU的支持  
 ``` Bash
-## cpu 
-cd darknet && cp  Makefile-cpu Makefile && make
 ## GPU
 cd darknet && cp  Makefile-GPU Makefile && make
 ```
+##  CPU优化
+参考opencv版本编译 : https://github.com/chineseocr/opencv-for-darknet.git   
+
 
 ## web服务启动(支持文件上传及URL图像)
 ``` Bash
@@ -34,13 +35,9 @@ http://127.0.0.1:8080/text
 
 ## 识别结果展示
 
-<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/img-demo.png"/>  
-<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/text.png"/>   
-<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/song.png"/>   
-<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/dinge.png"/>   
-<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/ocr.png"/>   
-<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/sh.png"/>  
-<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/bank.png"/>  
+<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/demo-line.png"/>  
+<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/demo-rotate.png"/>   
+<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/japanese-demo.png"/>   
 
 ## 参考
 1. darknet https://github.com/pjreddie/darknet.git               
 
@@ -6,8 +6,6 @@
 web.config.debug  = False
 import uuid
 import json
-import os
-import time
 import cv2
 import numpy as np
 from helper.image import read_url_img,base64_to_PIL,get_now
 
@@ -11,5 +11,5 @@
 TEXT_LINE_SCORE=0.7##text line prob
 scale = 900##可动态修改 no care text.cfg height,width
 maxScale = 1800
-GPU=True ## gpu for darknet  or cpu for opencv.dnn 
+GPU=False ## gpu for darknet  or cpu for opencv.dnn 
 anchors = '16,11, 16,16, 16,23, 16,33, 16,48, 16,68, 16,97, 16,139, 16,198, 16,283'
@@ -4,9 +4,10 @@
 main
 @author: chineseocr
 @mail: [email protected]
+adjust ocr ,ouput char prob 
 """
 
-from dnn.ocr import predict_darknet as ocrModel
+from dnn.ocr import predict  as ocrModel
 from dnn.text import detect_lines as textModel
 from PIL import Image
 import numpy as np
@@ -65,7 +66,10 @@ def text_ocr(img,scale,maxScale,TEXT_LINE_SCORE):
         if scores[i]>TEXT_LINE_SCORE:
             tmpImg = rotate_cut_img(im,box,leftAdjust=0.01,rightAdjust=0.01)
             text = ocrModel(tmpImg)
-            result.append({'text':text,'box':[ int(x) for x in box],'prob':round(float(scores[i]),2)})
+            if text['text']!='':
+                text['box'] = [ int(x) for x in box]
+                text['textprob']=round(float(scores[i]),2)
+                result.append(text)
     result = sorted(result,key=lambda x:sum(x['box'][1::2]))
     return result
 
 
@@ -1,3 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+ocr 
+@author: chineseocr
+@mail: [email protected]
+## add opencv dnn for relu and stride 
+## add ocr prob for every char
+"""
 import cv2
 import os
 import time
@@ -8,20 +17,34 @@
 def read_characters():
     p= ocrPath.replace('.weights','.json')
     if os.path.exists(p):
-        with open(p)  as f:
+        with open(p,encoding='utf-8')  as f:
             characters = json.loads(f.read())
         return characters
     else:
         return ''
 
 
 charactersPred = ' '+read_characters()+'｜ '
-if 1:
+if GPU:
     from dnn.darknet import  load_net,predict_image,array_to_image
     ocrNet = load_net(ocrPath.replace('.weights','.cfg').encode(),ocrPath.encode(), 0)
 else:
     ocrNet = cv2.dnn.readNetFromDarknet(ocrPath.replace('.weights','.cfg'),ocrPath)
-
+    
+    
+def predict(image):
+   if GPU:
+       return predict_darknet(image)
+   else:
+       return predict_cpu(image)
+   
+def softmax(res):
+    resMax = res.max(axis=1).reshape((-1,1))
+    res    = res-resMax
+    res    = np.exp(res)
+    expSum = res.sum(axis=1).reshape((-1,1))
+    return res/expSum
+        
 def predict_cpu(image):
        """
        cnn ctc model  
@@ -30,13 +53,19 @@ def predict_cpu(image):
        scale = image.size[1]*1.0 / 32
        w = image.size[0] / scale
        w = int(w)
+       if w<8:
+           return {'chars':[],'text':'','prob':0}
        image   = image.resize((w,32),Image.BILINEAR)
        image = (np.array(image.convert('L'))/255.0-0.5)/0.5
        image = np.array([[image]])
        ocrNet.setInput(image)
-       y_pred = ocrNet.forward(ocrNet.getUnconnectedOutLayersNames())
-       y_pred = y_pred[0][0,:,-1,:]
-       out    = decode(y_pred)##
+       y_pred = ocrNet.forward()
+       out = y_pred[0][:,0,:]
+       
+       out = out.transpose((1,0))
+       out = softmax(out)
+       out = decode(out)##
+       
        return out
 
 def predict_darknet(image):
@@ -47,7 +76,7 @@ def predict_darknet(image):
     image = (np.array(image.convert('L'))/255.0-0.5)/0.5
     h,w = image.shape
     if w<8:
-        return ''
+        return {'chars':[],'text':'','prob':0}
     tmp = np.zeros((h,w,1))
     tmp[:,:,0] = image
 
@@ -58,23 +87,32 @@ def predict_darknet(image):
     out = [ res[i] for i in range(outW*nchars)] 
     out = np.array(out).reshape((nchars,outW))
     out = out.transpose((1,0))
+    out = softmax(out)
+    
     return decode(out)
 
-   
+
 def decode(pred):
         t = pred.argmax(axis=1)
+        prob  = [ pred[ind,pb] for ind,pb in enumerate(t)]
+   
         length = len(t)
-        char_list = []
+        charList = []
+        probList = []
         n = len(charactersPred)
         for i in range(length):
            if t[i] not in [n-1,n-1] and (not (i > 0 and t[i - 1] == t[i])):
-                        char_list.append(charactersPred[t[i]])
-        return ''.join(char_list)
+                        charList.append(charactersPred[t[i]])
+                        probList.append(prob[i])
+        res = {'text':''.join(charList),
+               "prob":round(float(min(probList)),2) if len(probList)>0 else 0,
+               "chars":[{'char':char,'prob':round(float(p),2)}for char ,p in zip(charList,probList)]}
+        return res
 
 
 
 if __name__=='__main__':
     t =time.time()
-    img=Image.open('./test/test.png')
-    res = predict_darknet(img)
+    img=Image.open('./test/dd.jpg')
+    res = predict(img)
     print(time.time()-t,res)
@@ -61,6 +61,8 @@ activation=leaky
 [maxpool]
 size=2
 stride=2,1
+strideW=1
+strideH=2
 padding=0
 
 #conv5
@@ -85,6 +87,8 @@ activation=leaky
 [maxpool]
 size=2
 stride=2,1
+strideW=1
+strideH=2
 padding=0
 
 #conv7
 
@@ -61,6 +61,8 @@ activation=relu
 [maxpool]
 size=2
 stride=2,1
+strideW=1
+strideH=2
 padding=0
 
 #conv5
@@ -85,6 +87,8 @@ activation=relu
 [maxpool]
 size=2
 stride=2,1
+strideW=1
+strideH=2
 padding=0
 
 #conv7
 
@@ -58,6 +58,8 @@ activation=leaky
 [maxpool]
 size=2
 stride=2,1
+strideW=1
+strideH=2
 padding=0
 
 #conv5
@@ -80,6 +82,8 @@ activation=leaky
 [maxpool]
 size=2
 stride=2,1
+strideW=1
+strideH=2
 padding=0
 
 #conv7
 
@@ -57,6 +57,8 @@ activation=leaky
 [maxpool]
 size=2
 stride=2,1
+strideW=1
+strideH=2
 padding=0
 
 #conv5
@@ -79,6 +81,8 @@ activation=leaky
 [maxpool]
 size=2
 stride=2,1
+strideW=1
+strideH=2
 padding=0
 
 #conv7
 
@@ -1,5 +1,4 @@
 function getValue(value,that){
-        //勾选需要识别的票据类型
         var divStyle = document.getElementsByClassName("billname");
         for(var i=0;i<divStyle.length;i++){
             divStyle[i].style.background = 'white';
@@ -155,14 +154,18 @@ function createTable(result){
         imgBoxes=[];
         //var jsObject = [{"name":10,"value":20},{"name":10,"value":20}];
 
-        var tableString ="<table id='billmodeltable' class='gridtable'><tr><th>序号</th><th>值</th></tr>"
+        var tableString ="<table id='billmodeltable' class='gridtable'><tr><th>序号</th><th>值</th><th>文本检测置信度</th><th>OCR置信度</th></tr>"
 
         for(var i=0;i<jsObject.length;i++){
-            tableString+="<tr><td><p>"+i+"</p></td><td><p contenteditable='true'>"+jsObject[i]["text"]+"</p></td></tr>";
+            var index = "<td>"+i+"</td>";
+            var text     =  "<td><p contenteditable='true'>"+ jsObject[i]["text"]+"</p></td>";
+            var textProb =  "<td>"+ jsObject[i]["textprob"]+ "</td>";
+            var ocrProb  =  "<td>"+ jsObject[i]["prob"]+ "</td>" ;
+            tableString += "<tr>"+index+text+textProb+ocrProb;
+            
             imgBoxes.push(jsObject[i]["box"]);
         }
         tableString+="</table>";
-        //jQuery("#mytable").append(p);
         jQuery("#mytable").append(tableString);
     }
 
 
@@ -7,9 +7,6 @@
     imgJson={"width":$post['width'],
              "height":$post['height'],
              "uuid":"$post['uuid']",
-             "billModel":"",
-             "iscut":false,//是否多票据识别
-             "isclass":false,//是否自动进行票据分类
              'istext':true,//文字检测
             }
Original file line number	Diff line number	Diff line change
`@@ -7,9 +7,6 @@`
`7`	`7`	`imgJson={"width":$post['width'],`
`8`	`8`	`"height":$post['height'],`
`9`	`9`	`"uuid":"$post['uuid']",`
`10`		`- "billModel":"",`
`11`		`- "iscut":false,//是否多票据识别`
`12`		`- "isclass":false,//是否自动进行票据分类`
`13`	`10`	`'istext':true,//文字检测`
`14`	`11`	`}`
`15`	`12`