Skip to content

Commit e8b076b

Browse files
committed
add opencv dnn for ocr
1 parent d340c3d commit e8b076b

23 files changed

+88
-35
lines changed

README.md

+7-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
## 本项目基于darknet(https://github.com/pjreddie/darknet.git)框架实现CTPN版本自然场景文字检测 与CNN+CTCOCR文字识别
2-
## python版本(3.6
2+
## 支持系统:mac/ubuntu python=3.6
33
## 实现功能
44
- [x] 文字检测;
55
- [x] 文字识别;
@@ -16,11 +16,12 @@
1616

1717
## 编译对GPU的支持
1818
``` Bash
19-
## cpu
20-
cd darknet && cp Makefile-cpu Makefile && make
2119
## GPU
2220
cd darknet && cp Makefile-GPU Makefile && make
2321
```
22+
## CPU优化
23+
参考opencv版本编译 : https://github.com/chineseocr/opencv-for-darknet.git
24+
2425

2526
## web服务启动(支持文件上传及URL图像)
2627
``` Bash
@@ -34,13 +35,9 @@ http://127.0.0.1:8080/text
3435

3536
## 识别结果展示
3637

37-
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/img-demo.png"/>
38-
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/text.png"/>
39-
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/song.png"/>
40-
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/dinge.png"/>
41-
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/ocr.png"/>
42-
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/sh.png"/>
43-
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/bank.png"/>
38+
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/demo-line.png"/>
39+
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/demo-rotate.png"/>
40+
<img width="500" height="300" src="https://github.com/chineseocr/darknet-ocr/blob/master/test/japanese-demo.png"/>
4441

4542
## 参考
4643
1. darknet https://github.com/pjreddie/darknet.git

app.py

-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
web.config.debug = False
77
import uuid
88
import json
9-
import os
10-
import time
119
import cv2
1210
import numpy as np
1311
from helper.image import read_url_img,base64_to_PIL,get_now

config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@
1111
TEXT_LINE_SCORE=0.7##text line prob
1212
scale = 900##可动态修改 no care text.cfg height,width
1313
maxScale = 1800
14-
GPU=True ## gpu for darknet or cpu for opencv.dnn
14+
GPU=False ## gpu for darknet or cpu for opencv.dnn
1515
anchors = '16,11, 16,16, 16,23, 16,33, 16,48, 16,68, 16,97, 16,139, 16,198, 16,283'

dnn/main.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
main
55
@author: chineseocr
66
7+
adjust ocr ,ouput char prob
78
"""
89

9-
from dnn.ocr import predict_darknet as ocrModel
10+
from dnn.ocr import predict as ocrModel
1011
from dnn.text import detect_lines as textModel
1112
from PIL import Image
1213
import numpy as np
@@ -65,7 +66,10 @@ def text_ocr(img,scale,maxScale,TEXT_LINE_SCORE):
6566
if scores[i]>TEXT_LINE_SCORE:
6667
tmpImg = rotate_cut_img(im,box,leftAdjust=0.01,rightAdjust=0.01)
6768
text = ocrModel(tmpImg)
68-
result.append({'text':text,'box':[ int(x) for x in box],'prob':round(float(scores[i]),2)})
69+
if text['text']!='':
70+
text['box'] = [ int(x) for x in box]
71+
text['textprob']=round(float(scores[i]),2)
72+
result.append(text)
6973
result = sorted(result,key=lambda x:sum(x['box'][1::2]))
7074
return result
7175

dnn/ocr.py

+51-13
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
ocr
5+
@author: chineseocr
6+
7+
## add opencv dnn for relu and stride
8+
## add ocr prob for every char
9+
"""
110
import cv2
211
import os
312
import time
@@ -8,20 +17,34 @@
817
def read_characters():
918
p= ocrPath.replace('.weights','.json')
1019
if os.path.exists(p):
11-
with open(p) as f:
20+
with open(p,encoding='utf-8') as f:
1221
characters = json.loads(f.read())
1322
return characters
1423
else:
1524
return ''
1625

1726

1827
charactersPred = ' '+read_characters()+'| '
19-
if 1:
28+
if GPU:
2029
from dnn.darknet import load_net,predict_image,array_to_image
2130
ocrNet = load_net(ocrPath.replace('.weights','.cfg').encode(),ocrPath.encode(), 0)
2231
else:
2332
ocrNet = cv2.dnn.readNetFromDarknet(ocrPath.replace('.weights','.cfg'),ocrPath)
24-
33+
34+
35+
def predict(image):
36+
if GPU:
37+
return predict_darknet(image)
38+
else:
39+
return predict_cpu(image)
40+
41+
def softmax(res):
42+
resMax = res.max(axis=1).reshape((-1,1))
43+
res = res-resMax
44+
res = np.exp(res)
45+
expSum = res.sum(axis=1).reshape((-1,1))
46+
return res/expSum
47+
2548
def predict_cpu(image):
2649
"""
2750
cnn ctc model
@@ -30,13 +53,19 @@ def predict_cpu(image):
3053
scale = image.size[1]*1.0 / 32
3154
w = image.size[0] / scale
3255
w = int(w)
56+
if w<8:
57+
return {'chars':[],'text':'','prob':0}
3358
image = image.resize((w,32),Image.BILINEAR)
3459
image = (np.array(image.convert('L'))/255.0-0.5)/0.5
3560
image = np.array([[image]])
3661
ocrNet.setInput(image)
37-
y_pred = ocrNet.forward(ocrNet.getUnconnectedOutLayersNames())
38-
y_pred = y_pred[0][0,:,-1,:]
39-
out = decode(y_pred)##
62+
y_pred = ocrNet.forward()
63+
out = y_pred[0][:,0,:]
64+
65+
out = out.transpose((1,0))
66+
out = softmax(out)
67+
out = decode(out)##
68+
4069
return out
4170

4271
def predict_darknet(image):
@@ -47,7 +76,7 @@ def predict_darknet(image):
4776
image = (np.array(image.convert('L'))/255.0-0.5)/0.5
4877
h,w = image.shape
4978
if w<8:
50-
return ''
79+
return {'chars':[],'text':'','prob':0}
5180
tmp = np.zeros((h,w,1))
5281
tmp[:,:,0] = image
5382

@@ -58,23 +87,32 @@ def predict_darknet(image):
5887
out = [ res[i] for i in range(outW*nchars)]
5988
out = np.array(out).reshape((nchars,outW))
6089
out = out.transpose((1,0))
90+
out = softmax(out)
91+
6192
return decode(out)
6293

63-
94+
6495
def decode(pred):
6596
t = pred.argmax(axis=1)
97+
prob = [ pred[ind,pb] for ind,pb in enumerate(t)]
98+
6699
length = len(t)
67-
char_list = []
100+
charList = []
101+
probList = []
68102
n = len(charactersPred)
69103
for i in range(length):
70104
if t[i] not in [n-1,n-1] and (not (i > 0 and t[i - 1] == t[i])):
71-
char_list.append(charactersPred[t[i]])
72-
return ''.join(char_list)
105+
charList.append(charactersPred[t[i]])
106+
probList.append(prob[i])
107+
res = {'text':''.join(charList),
108+
"prob":round(float(min(probList)),2) if len(probList)>0 else 0,
109+
"chars":[{'char':char,'prob':round(float(p),2)}for char ,p in zip(charList,probList)]}
110+
return res
73111

74112

75113

76114
if __name__=='__main__':
77115
t =time.time()
78-
img=Image.open('./test/test.png')
79-
res = predict_darknet(img)
116+
img=Image.open('./test/dd.jpg')
117+
res = predict(img)
80118
print(time.time()-t,res)

models/ocr/chinese-5k/ocr.cfg

+4
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ activation=leaky
6161
[maxpool]
6262
size=2
6363
stride=2,1
64+
strideW=1
65+
strideH=2
6466
padding=0
6567

6668
#conv5
@@ -85,6 +87,8 @@ activation=leaky
8587
[maxpool]
8688
size=2
8789
stride=2,1
90+
strideW=1
91+
strideH=2
8892
padding=0
8993

9094
#conv7

models/ocr/chinese/ocr.cfg

+4
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ activation=relu
6161
[maxpool]
6262
size=2
6363
stride=2,1
64+
strideW=1
65+
strideH=2
6466
padding=0
6567

6668
#conv5
@@ -85,6 +87,8 @@ activation=relu
8587
[maxpool]
8688
size=2
8789
stride=2,1
90+
strideW=1
91+
strideH=2
8892
padding=0
8993

9094
#conv7

models/ocr/english/ocr.cfg

+4
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ activation=leaky
5858
[maxpool]
5959
size=2
6060
stride=2,1
61+
strideW=1
62+
strideH=2
6163
padding=0
6264

6365
#conv5
@@ -80,6 +82,8 @@ activation=leaky
8082
[maxpool]
8183
size=2
8284
stride=2,1
85+
strideW=1
86+
strideH=2
8387
padding=0
8488

8589
#conv7

models/ocr/japanese/ocr.cfg

+4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ activation=leaky
5757
[maxpool]
5858
size=2
5959
stride=2,1
60+
strideW=1
61+
strideH=2
6062
padding=0
6163

6264
#conv5
@@ -79,6 +81,8 @@ activation=leaky
7981
[maxpool]
8082
size=2
8183
stride=2,1
84+
strideW=1
85+
strideH=2
8286
padding=0
8387

8488
#conv7

static/js/helps.js

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
function getValue(value,that){
2-
//勾选需要识别的票据类型
32
var divStyle = document.getElementsByClassName("billname");
43
for(var i=0;i<divStyle.length;i++){
54
divStyle[i].style.background = 'white';
@@ -155,14 +154,18 @@ function createTable(result){
155154
imgBoxes=[];
156155
//var jsObject = [{"name":10,"value":20},{"name":10,"value":20}];
157156

158-
var tableString ="<table id='billmodeltable' class='gridtable'><tr><th>序号</th><th>值</th></tr>"
157+
var tableString ="<table id='billmodeltable' class='gridtable'><tr><th>序号</th><th>值</th><th>文本检测置信度</th><th>OCR置信度</th></tr>"
159158

160159
for(var i=0;i<jsObject.length;i++){
161-
tableString+="<tr><td><p>"+i+"</p></td><td><p contenteditable='true'>"+jsObject[i]["text"]+"</p></td></tr>";
160+
var index = "<td>"+i+"</td>";
161+
var text = "<td><p contenteditable='true'>"+ jsObject[i]["text"]+"</p></td>";
162+
var textProb = "<td>"+ jsObject[i]["textprob"]+ "</td>";
163+
var ocrProb = "<td>"+ jsObject[i]["prob"]+ "</td>" ;
164+
tableString += "<tr>"+index+text+textProb+ocrProb;
165+
162166
imgBoxes.push(jsObject[i]["box"]);
163167
}
164168
tableString+="</table>";
165-
//jQuery("#mytable").append(p);
166169
jQuery("#mytable").append(tableString);
167170
}
168171

templates/text.html

-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
imgJson={"width":$post['width'],
88
"height":$post['height'],
99
"uuid":"$post['uuid']",
10-
"billModel":"",
11-
"iscut":false,//是否多票据识别
12-
"isclass":false,//是否自动进行票据分类
1310
'istext':true,//文字检测
1411
}
1512

test/bank.png

-1.14 MB
Binary file not shown.

test/demo-line.png

336 KB
Loading

test/demo-rotate.png

831 KB
Loading

test/dinge.png

-1.38 MB
Binary file not shown.

test/img-demo.png

-765 KB
Binary file not shown.

test/japanese-demo.png

161 KB
Loading

test/japnese.png

19.5 KB
Loading

test/ocr.jpeg

-1.63 KB
Binary file not shown.

test/ocr.png

-268 KB
Binary file not shown.

test/sh.png

-957 KB
Binary file not shown.

test/song.png

-1.02 MB
Binary file not shown.

test/text.png

-763 KB
Binary file not shown.

0 commit comments

Comments
 (0)