Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 42 additions & 10 deletions predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,51 @@
3、如果想要获得框的坐标,可以进入detect_image函数,读取top,left,bottom,right这四个值。
4、如果想要截取下目标,可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。
'''
import cv2
from PIL import Image
import numpy as np

from yolo import YOLO

from IPython import embed

yolo = YOLO()

while True:
img = input('Input image filename:')
try:
image = Image.open(img)
except:
print('Open Error! Try again!')
continue
else:
r_image = yolo.detect_image(image)
r_image.show()
image = Image.open('./img/view2.jpg')# 返回PIL.img对象
uncroped_image = cv2.imread("./img/view2.jpg")

r_image,boxes = yolo.detect_image(image)

# 进行裁剪
box = boxes

for i in range(boxes.shape[0]):
# top, left, bottom, right = boxes[i]
# 或者用下面这句等价
top = boxes[i][0]
left = boxes[i][1]
bottom = boxes[i][2]
right = boxes[i][3]

top = top - 5
left = left - 5
bottom = bottom + 5
right = right + 5

# 左上角点的坐标
top = int(max(0, np.floor(top + 0.5).astype('int32')))

left = int(max(0, np.floor(left + 0.5).astype('int32')))
# 右下角点的坐标
bottom = int(min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')))
right = int(min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')))

# embed()

# 问题出在这里:不能用这个方法,看两个参数是长和宽,是从图像的原点开始裁剪的,这样肯定是不对的
croped_region = uncroped_image[top:bottom,left:right]# 先高后宽
# 将裁剪好的目标保存到本地
cv2.imwrite("./output/croped_view2_img_"+str(i)+".jpg",croped_region)

# embed()
r_image.show()
55 changes: 40 additions & 15 deletions yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
non_max_suppression, yolo_correct_boxes)

from IPython import embed

#--------------------------------------------#
# 使用自己训练好的模型预测需要修改2个参数
Expand All @@ -28,10 +29,10 @@ class YOLO(object):
"model_path" : 'model_data/yolo4_weights.pth',
"anchors_path" : 'model_data/yolo_anchors.txt',
"classes_path" : 'model_data/coco_classes.txt',
"model_image_size" : (416, 416, 3),
"confidence" : 0.5,
"model_image_size" : (416, 416, 3),#这里的model_image_size是什么,不会跟图像size产生冲突吗,为什么不可以改????
"confidence" : 0.3,
"iou" : 0.3,
"cuda" : True
"cuda" : False
}

@classmethod
Expand Down Expand Up @@ -96,7 +97,7 @@ def generate(self):
#---------------------------------------------------#
# 建立三个特征层解码用的工具
#---------------------------------------------------#
self.yolo_decodes = []
self.yolo_decodes = []# 创建数组,将三个解码器放到数组中
for i in range(3):
self.yolo_decodes.append(DecodeBox(self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0])))

Expand All @@ -114,39 +115,52 @@ def generate(self):
# 检测图片
#---------------------------------------------------#
def detect_image(self, image):
# embed()
image_shape = np.array(np.shape(image)[0:2])
num_class = len(self.class_names)# 有80类
# embed()

#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 给图像增加灰条(什么是灰条),实现不失真的resize
#---------------------------------------------------------#
# 复制image return new_image
crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
photo = np.array(crop_img,dtype = np.float32) / 255.0
photo = np.transpose(photo, (2, 0, 1))
photo = np.array(crop_img,dtype = np.float32) / 255.0# 归一化?
photo = np.transpose(photo, (2, 0, 1))# 转置:将Image.open(img)得到的[H,W,C]格式转换permute为pytorch可以处理的[C,H,W]格式
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images = [photo]
images = [photo]# 将photo变为list类型

with torch.no_grad():
images = torch.from_numpy(np.asarray(images))
with torch.no_grad():# disabled gradient calculation,reduce memory consumption for computations
images = torch.from_numpy(np.asarray(images))# Creates a Tensor from a numpy.ndarray,此时images的shape为[1, 3, 416, 416]
if self.cuda:
images = images.cuda()

#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
# embed()
# 从这里开始处理
# 特征提取
# 输出outputs为tuple,len=3,每个tensor的shape分别为 第一个特征层[1, 255, 13, 13],第二个特征层[1, 255, 26, 26],第三个特征层[1, 255, 52, 52]
outputs = self.net(images)
# embed()
output_list = []
for i in range(3):
output_list.append(self.yolo_decodes[i](outputs[i]))
for i in range(3):# 为什么是3
# 有三个特征层,每个特征层对应自己的decode解码器
output_list.append(self.yolo_decodes[i](outputs[i]))# 在这里打几个断点看看

#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
output = torch.cat(output_list, 1)
# torch.cat()对矩阵按行进行拼接得到向量
output = torch.cat(output_list, 1)# 这里也打几个断点
# output就是predictions,格式为[batch_size, num_anchors, 85]
batch_detections = non_max_suppression(output, len(self.class_names),
conf_thres=self.confidence,
nms_thres=self.iou)
# embed()

#---------------------------------------------------------#
# 如果没有检测出物体,返回原图
Expand All @@ -159,24 +173,33 @@ def detect_image(self, image):
#---------------------------------------------------------#
# 对预测框进行得分筛选
#---------------------------------------------------------#
# coordinates = []# bboxes的坐标

top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
top_label = np.array(batch_detections[top_index,-1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4])

# 得到坐标点
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)

# coordinates.append((top_xmin,top_xmax,top_ymin,top_ymax))# 把四个坐标点看做一个整体

#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的top_bboxes是相对于有灰条的图像的
# 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------#

# boxes存放各目标的坐标
boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)

font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)

for i, c in enumerate(top_label):
# embed()
predicted_class = self.class_names[c]
score = top_conf[i]

Expand All @@ -186,8 +209,10 @@ def detect_image(self, image):
bottom = bottom + 5
right = right + 5

# 左上角点的坐标
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
# 右下角点的坐标
bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

Expand All @@ -207,10 +232,10 @@ def detect_image(self, image):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[self.class_names.index(predicted_class)])
draw.rectangle(
draw.rectangle(# 画框框
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[self.class_names.index(predicted_class)])
draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
del draw
return image
return image,boxes# 将boxes返回